赞
踩
- from selenium import webdriver
- import time
- # from selenium.webdriver.common.action_chains import ActionChains
- from selenium.webdriver.common.by import By
-
- from selenium.webdriver.common.keys import Keys
- # from selenium.webdriver.common.keys import Keys PhantomJS浏览器创建器对象
这是头文件需要用到的库。我们首先要配置selenium。我们还需要去下载Web驱动。
我直接放到的 python.exe 那个目录里了。
我们需要 访问网页的元素。并用代码代替人为的操作。
- driver = webdriver.Chrome()
- driver.get('https://www.gscloud.cn/accounts/login_user')
- driver.get('https://www.gscloud.cn/accounts/login_user')
- email = driver.find_element(By.ID,'email')
- email.send_keys('3559216518@qq.com')
- passport = driver.find_element(By.XPATH,'//*[@id="password"]')
- passport.send_keys('GWX072551a')
- captcha=driver.find_element(By.XPATH,'//*[@id="id_captcha_1"]')
- captcha_sj=input('请输入验证码:').strip()
- captcha.send_keys(captcha_sj)
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="btn-login"]').click()
- time.sleep(3)
- driver.find_element(By.XPATH,'/html/body/div[1]/div[1]/div[2]/ul/li[2]').click()
- time.sleep(3)
- #//*[@id="dataset-btn"]/img
- driver.find_element(By.XPATH,'//*[@id="dataset-btn"]/img').click()
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[1]/span[1]').click()
- #driver.find_element(By.XPATH,'//*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[4]/ul/li[1]/div/label/span[2]').click() //*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[1]/span[1] //*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[4]/span[1]
- #//*[@id="c421"] //*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[4]/span[1]
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="all-datasets-dlg"]/div/div[2]/div/div[2]/ul/li[4]/span[1]').click()
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="c421"]').click()
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="all-datasets-dlg"]/div/div[3]/button[2]').click()
- # //*[@id="condition-panel"]/div[2]/div[2]/div
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="condition-panel"]/div[2]/div[2]/div').click()
- # //*[@id="condition-panel"]/div[2]/div[2]/div/label[2]
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="condition-panel"]/div[2]/div[2]/div/label[2]').click()
- # //*[@id="lng_input_1"]
- time.sleep(3)
- #70 135 15 55
- LeftJinDu = driver.find_element(By.XPATH,'//*[@id="lng_input_1"]')
- LeftJinDu.send_keys('70.5')
- # //*[@id="lng_input_2"]
- time.sleep(3)
- LeftJinDu = driver.find_element(By.XPATH,'//*[@id="lng_input_2"]')
- LeftJinDu.send_keys('134.5')
-
- time.sleep(3)
- LeftJinDu = driver.find_element(By.XPATH,'//*[@id="lat_input_1"]')
- LeftJinDu.send_keys('15.5')
-
- time.sleep(3)
- LeftJinDu = driver.find_element(By.XPATH,'//*[@id="lat_input_2"]')
- LeftJinDu.send_keys('54.5')
- #//*[@id="search-btn"]
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="search-btn"]').click()
- time.sleep(3)
- driver.find_element(By.XPATH,'//*[@id="search-btn"]').click()
- time.sleep(3)
会一步一步点击元素,跳转到这个页面
- page_num=222
- page=1
- page_sr = driver.find_element(By.XPATH,'//*[@id="pager"]/div/table/tr/td[7]/input')
- # //*[@id="pager"]/div/table/tr/td[7]/input
- page_sr.clear()
- page_sr.send_keys(page)
- page_sr.send_keys(Keys.RETURN)
- time.sleep(10)
-
-
- while page<=page_num:
- print('当前下载第{}页'.format(page))
- for tr_num in range(1,11): #只能取到3-12
- d_everypage='//*[@id="result-listview"]/div/table/tr['+str(tr_num)+']/td[2]/div/div/a[2]/span/img'
- download=driver.find_element(By.XPATH,d_everypage).click()
- print(d_everypage)
- time.sleep(60) #每个下载时间给20秒
- page += 1
- page_sr = driver.find_element(By.XPATH,'//*[@id="pager"]/div/table/tr/td[7]/input')
- # //*[@id="pager"]/div/table/tr/td[7]/input
- page_sr.clear()
- page_sr.send_keys(page)
- page_sr.send_keys(Keys.RETURN)
- time.sleep(15)
- input()
想要获得,对应页面元素的。Xpth,就用检查。选中对应的元素代码,右键点击后,Copy.
Cpoy Xpth 即可。这样特别方便find_element(By.Xpth)。
一般都要time.Sleep,因为等待需要网络响应.再点击。特别网不好的时候,sleep时间适当长一点。可以保证,程序不出错。
有时因为网页反应问题导致,元素停止在上一个网页没更新。数据有遗漏,导致上一页的数据重复下载。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。