赞
踩
将如下格式的参考文献转换为bib格式并存储到txt中,方便将word参考文献转换为Latex。
from selenium import webdriver from selenium.webdriver.common.by import By import selenium import time def read_txt(path): papers = [] with open(path,'r', encoding='utf-8') as f: content=f.readlines() for c in content: paper_name=c.split('.')[1].split('[')[0] papers.append(paper_name) return papers def notrobort(wd, File,index): """出现人机认证时,激活该部分,先手动通过图片认证""" time.sleep(60) # 用于图片认证的时间 10s element = wd.find_element(By.ID, 'gs_hdr_tsi') element.send_keys(File[index] + '\n') linkElem = wd.find_element(By.LINK_TEXT, '引用') linkElem.click() linkElem = wd.find_element(By.LINK_TEXT, 'BibTeX') linkElem.click() time.sleep(2) try: element = wd.find_element(By.TAG_NAME, 'pre') print(element.text) time.sleep(2) wd.refresh() wd.back() wd.back() wd.back() wd.refresh() wd.back() except selenium.common.exceptions.NoSuchElementException: print('将进行第二次人机验证\n') time.sleep(60) # 用于图片认证的时间 10s element = wd.find_element(By.TAG_NAME, 'pre') print(element.text) time.sleep(2) wd.refresh() wd.back() wd.back() wd.back() wd.refresh() wd.back() def bib(wd, index, l, File): with open('bib.txt', 'w+', encoding='utf-8') as f: while index < l: try: wd.get('https://scholar.google.com/') wd.maximize_window() element = wd.find_element(By.ID, 'gs_hdr_tsi') element.send_keys(File[index] + '\n') try: linkElem = wd.find_element(By.LINK_TEXT, '引用') linkElem.click() linkElem = wd.find_element(By.LINK_TEXT, 'BibTeX') linkElem.click() element = wd.find_element(By.TAG_NAME, 'pre') print(element.text) f.write('\n' + element.text) time.sleep(2) wd.refresh() wd.back() wd.back() wd.back() index += 1 except selenium.common.exceptions.NoSuchElementException: index+=1 except selenium.common.exceptions.NoSuchElementException: wd.quit() key = input("请按 Y 进行人机验证\n") if key == 'Y': print('将进行人机验证\n') option = webdriver.ChromeOptions() option.add_experimental_option("detach", True) wd = webdriver.Chrome( executable_path=r'D:\anaconda\envs\tf1\chromedriver.exe', options=option) wd.implicitly_wait(5) wd.get('https://scholar.google.com/') wd.maximize_window() notrobort(wd, File,index) # f.write('\n' + bib) wd.quit() def bibdownload(path): # File = readfilename(path) File=read_txt(path) l = len(File) index = 0 option = webdriver.ChromeOptions() option.add_experimental_option("detach", True) wd = webdriver.Chrome( executable_path=r'D:\anaconda\envs\tf1\chromedriver.exe', options=option) wd.implicitly_wait(5) wd.get('https://scholar.google.com/') wd.maximize_window() bib(wd, index, l, File) wd = webdriver.Chrome( executable_path=r'D:\anaconda\envs\tf1\chromedriver.exe', options=option) wd.implicitly_wait(5) wd.get('https://www.apple.com.cn/') wd.maximize_window() time.sleep(12) wd.quit() def paperrush(path): bibdownload(path) if __name__ == "__main__": path = r'D:\Documents\ref.txt' paperrush(path)
1.需要科学上网
2.path = r'D:\Documents\ref.txt'
存放所有参考文献的txt文件,要求每行以[序号]作者.文献名称[文献类型]这种格式开头(其实就是谷歌学术那种常用GBT的参考文献格式)
3.executable_path=r'D:\anaconda\envs\tf1\chromedriver.exe'
更换为自己环境的chromedriver.exe,而且要求和谷歌浏览器的版本一致,不然会报错
4.论文搜索多了会进行人机验证,这时需手动按“Y”进入,有一分钟的验证时间。可能会有两次,一次是在搜索关键词时,一次是在打开bib时
5.有时候直接搜索会无法找到文献,在代码中会自动跳过,需要在之后自行添加(不过这种情况还是比较少的,以中文文献居多,可能换成百度学术要好些)
6.最后结果保存在项目下的bib.txt中
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。