赞
踩
文件下载地址:https://pan.baidu.com/s/1scu7hvNFJxRSrt9oonZHlw?pwd=kxcc
提取码:kxcc
2020最新仿花生日记淘宝客双端原生APP网站源码
5vShop商城系统 v2.4
A3Mall APP商城系统 v1.0.3
A3Mall PHP商城系统源码 v1.7.2
A3Mallga开源商城系统 v1.7.6
API支付代理版自动发卡平台源码 v4.7.1
CareyShop PHP商城框架系统 v1.3.1
CmsEasy可视化编辑商城系统 v7.7.5.3
CRMEB Min开源商城 v4.3.2
CRMEB小程序公众号打通版商城系统 v4.1.0
DBShop php电子商务网店系统 v3.1 Release 211202
DSmall多商户B2B2C开源商城源码 v6.1.2
DSO2O外卖订餐开源系统源码 v4.0.6
DSShop TP5框架B2C开源商城源码(单店铺) v3.0.4
fanqie shop番茄社区多门店网店系统 php源码 v1.0
iWebShop 开源php电子商务软件 v5.10.210707
Jshop开源小程序商城系统 v2.6.0
LaiKe全场景电商系统(含APP与小程序) v3.5.0
leadshop开源商城 v1.4.6
Niushop一点牛社区团购源码 v1.0.0
Niushop开源商城Saas多开运营版 PHP源码 v1.0.5
oemshop商城系统SAAS平台版 v6.0.1
PHP云购源码附教程
PHP优惠卡发卡平台源码 v1.0
PHP公众号商家收银台微信扫码支付源码
PHP商城购物系统 v1.0 免费版
PHP实物卡售卡商城系统源码(商城源码 免登录)
php淘饭网在线要饭 v1.0
php竞价页订单管理系统(caozha-order ) v1.7.7
PHP自助下单系统彩虹商城源码V6.6免授权版
Q定制云印刷在线DIY定制系统 v5.0
SDCMS-B2C商城网站管理系统 V1.4
ShopXO企业级B2C免费开源电商系统 v2.2.1
StrongShop开源跨境商城 v1.4.2
Sylius 开源PHP电子商务网站框架 v1.10.5
Sylius 开源PHP电子商务网站框架 v1.9.5
Thinkphp5.1内核综合电子商务系统多用户B2B2C商城源码
thinkphp5仿阿里巴巴小说网站源码
Thinkphp内核开发京东淘宝唯品会自动抢单系统源码
Thinkphp高仿拼多多拼团源码(商城系统支持商家入驻)
WSTMart 商淘软件B2B2C多用户商城系统源码 v3.6.5
yershop开源网店系统 v3.9
youcms跨境电商多活动预告系统 v1.0
ZhiCms导购版(礼物) V4.0
一款可运营可二次开发淘宝客APP源码
一物一码数字化应用平台通用防伪追溯系统源码
乐得同城优惠券领取(发布)网站源码 v2.2.5
云划算试客系统旗舰安装版源码
五合一收款码在线生成网站源码(PayPal五合一收款+HTML单页版多模板免安装)
仿59store校园o2o系统 php源码 v7.1
企业级多用户发卡平台源码 v1.0
商擎MALL虚拟商品自动发货系统(含小程序) v1.1 build20211203
奇乐中介担保系统免费试用版 v1.2.7
币商智能量化机器人交易系统 v2.0.4
开源的PHP网店系统 PrestaShop v1.7.8.0
彼岸有客商城系统标准版 v1.0.5
得推B2B2C商城 v4.4
得推b2c商城系统 v5.0
得推php生鲜O2O连锁系统 v3.1
得推php社区团购系统 v3.1
得推跑腿O2O系统 v3.0
拼团返利电商系统(拼返系统) v2.6
易购cms企业商城系统(Ebuycms) v1.6
最新php淘宝客优惠券网站源码
最新小储云商城V1.782免授权PHP源码
最新逍遥B2C商城源码(PC+H5) v1.1.3
良精PHP商城网店购物系统 v1.21
荔枝个人发卡系统PHP源码 v2.0.0
萤火微信小程序商城源码 v1.1.05
金微手机商城系统 v0.3.9a
金微智能餐厅 v0.8.2E
魔众PHP在线B2C商城系统源码 v3.0.0
- import os
- import shutil
- import time
- from time import sleep
-
- import requests
- from bs4 import BeautifulSoup
- from docx import Document
- from docx.shared import Inches
-
- from framework.base.BaseFrame import BaseFrame
- from sprider.business.DownLoad import DownLoad
- from sprider.business.SeleniumTools import SeleniumTools
- from sprider.business.SpriderTools import SpriderTools
-
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from sprider.model.SpriderEntity import SpriderEntity
- from sprider.access.SpriderAccess import SpriderAccess
-
-
- class HuaJunCode:
- page_count = 1 # 每个栏目开始业务content="text/html; charset=gb2312"
- base_url = "http://soft.onlinedown.net/" # 采集的网址
- save_path = "D:\\Sprider\\HuaJunCode\\"
- sprider_count = 118 # 采集数量
-
- sprider_start_count=140 # 从第几个序号开始 直接改数量即可 会做除法操作 正在采集第33页的第23个资源 debug
-
- max_pager=25 #每页的数量
- haved_sprider_count =0 # 已经采集的数量
- word_content_list = []
- folder_name = ""
- page_end_number=0
- def __init__(self):
- pass
-
- def sprider(self,url_index=177):
- """
- 采集 http://soft.onlinedown.net/sort/177/
- :return:
- """
- if url_index==177:
- self.folder_name="PHP源码"
- elif url_index==178:
- self.folder_name = "博客系统"
- elif url_index == 179:
- self.folder_name = "整站系统"
- elif url_index == 180:
- self.folder_name = "商务商城"
- elif url_index == 176:
- self.folder_name = "ASP源码"
-
- merchant=int(self.sprider_start_count) //int(self.max_pager)+1
-
- self.save_path = self.save_path + os.sep + self.folder_name
-
- BaseFrame().debug("开始采集HuaJunCodePHP...")
- sprider_url=(self.base_url + "/sort/{0}/1/".format(url_index))
- down_path="D:\\Sprider\\HuaJunCode\\"+self.folder_name+"\\Temp\\"
- if os.path.exists(down_path) is True:
- shutil.rmtree(down_path)
- if os.path.exists(down_path) is False:
- os.makedirs(down_path)
- chrome_options = webdriver.ChromeOptions()
- diy_prefs ={'profile.default_content_settings.popups': 0,
- 'download.default_directory':'{0}'.format(down_path)}
- # 添加路径到selenium配置中
- chrome_options.add_experimental_option('prefs', diy_prefs)
- chrome_options.add_argument('--headless')
-
- # 实例化chrome浏览器时,关联忽略证书错误
- driver = webdriver.Chrome(options=chrome_options)
- driver.set_window_size(1280, 800) # 分辨率 1280*800
-
- # driver.get方法将定位在给定的URL的网页,get接受url可以是任何网址,此处以百度为例
- driver.get(sprider_url)
- # content = driver.page_source
- # print(content)
- pager_content=driver.find_element(By.CLASS_NAME, "page").find_element(By.TAG_NAME, 'span').text
- page_end_number = pager_content.replace("共", "").replace("页", "")
- #print(page_end_number)
- ul_elem =driver.find_element(By.ID, "soft_list") #列表页面 核心内容
- li_list=ul_elem.find_elements(By.TAG_NAME, 'li')
-
- self.page_count=merchant
-
- while self.page_count <= int(page_end_number): # 翻完停止
- try:
- if self.page_count == 1:
- self.sprider_detail(driver,li_list,self.page_count,page_end_number,down_path)
- pass
- else:
- if self.haved_sprider_count == self.sprider_count:
- BaseFrame().debug("采集到达数量采集停止...")
- BaseFrame().debug("开始写文章...")
- self.builder_word(self.folder_name, self.save_path, self.word_content_list)
- BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
- break
- #(self.base_url + "/sort/{0}/{1}/".format(url_index, self.page_count))
- #http://soft.onlinedown.net/sort/177/2/
-
- next_url = (self.base_url + "/sort/{0}/{1}/".format(url_index, self.page_count))
- driver.get(next_url)
- ul_elem = driver.find_element(By.ID, "soft_list") # 列表页面 核心内容
- li_list = ul_elem.find_elements(By.TAG_NAME, 'li')
- self.sprider_detail( driver, li_list, self.page_count, page_end_number, down_path)
- pass
- #print(self.page_count)
- self.page_count = self.page_count + 1 # 页码增加1
- except Exception as e:
- print("sprider()执行过程出现错误:" + str(e))
- sleep(1000000)
- #driver.quit()
-
-
-
- def sprider_detail(self, driver,element_list,page_count,max_page,down_path):
- """
- 采集明细页面
- :param driver:
- :param element_list:
- :param page_count:
- :param max_page:
- :param down_path:
- :return:
- """
- index = 0
- element_array=[]
- element_length=len(element_list)
- for element in element_list:
- next_url = element.find_element(By.TAG_NAME, 'a').get_attribute("href")
- coder_title = element.find_element(By.TAG_NAME, 'img').get_attribute("title")
- e=coder_title+"$"+ next_url
- element_array.append(e)
- pass
- #print(element_array)
-
- # print(element_list[index].find_element(By.TAG_NAME, 'a').get_attribute("href"))
- #self.save_path = self.save_path + os.sep + self.folder_name
- # page_count * element_length
- #self.haved_sprider_count=self.sprider_start_count
- # sprider_start_count
- self.sprider_start_index = int(self.sprider_start_count) % int(self.max_pager)
- index=self.sprider_start_index
- while index < element_length:
- # ul_elem = driver.find_element(By.ID, "soft_list") # 列表页面 核心内容
- # element_list = ul_elem.find_elements(By.TAG_NAME, 'li')
-
- if os.path.exists(down_path) is False:
- os.makedirs(down_path)
-
- if self.haved_sprider_count == self.sprider_count:
- BaseFrame().debug("采集到达数量采集停止...")
- break
-
- #element = element_list[index]
- element=element_array[index]
- time.sleep(1)
-
- index = index + 1
- sprider_info="正在采集第"+str(page_count)+"页的第"+str(index)+"个资源"
- BaseFrame().debug(sprider_info)
- next_url=element.split("$")[1]
- coder_title=element.split("$")[0]
- # next_url = element.find_element(By.TAG_NAME, 'a').get_attribute("href")
- # coder_title =element.find_element(By.TAG_NAME, 'img').get_attribute("title")
- driver.get(next_url) # 请求明细页面
- try:
- codeEntity = SpriderEntity() # 依据图片执行,下载过的图片不再下载
- codeEntity.sprider_base_url = self.base_url
- codeEntity.create_datetime = SpriderTools.get_current_datetime()
- codeEntity.sprider_url = next_url
- codeEntity.sprider_pic_title = coder_title
- codeEntity.sprider_pic_index = str(index)
- codeEntity.sprider_pager_index = page_count
- codeEntity.sprider_type = "code"
- if SpriderAccess().query_sprider_entity_by_urlandindex(next_url, str(index)) is None:
- SpriderAccess().save_sprider(codeEntity)
- else:
- BaseFrame().debug(coder_title+next_url + "数据采集过因此跳过")
- continue
-
- if SeleniumTools.judeg_element_isexist(driver, "CLASS_NAME", "bendown") == 3:
- driver.back()
- BaseFrame().debug(coder_title+"不存在源码是soft因此跳过哦....")
- continue
- print("准备点击下载按钮...")
- driver.find_element(By.CLASS_NAME, "bendown").click()
- result,message=SpriderTools.judge_file_exist(True,90,1,down_path,"zip|rar")
-
- if result is True:
-
-
- img_element =""
- if SeleniumTools.judeg_element_isexist(driver,"TAG_NAME","center")==3:
- if SeleniumTools.judeg_element_isexist(driver, "CLASS_NAME", "sortPic") == 3:
- pass
- else:
- img_element = driver.find_element(By.CLASS_NAME, "sortPic")
- img_element=img_element.find_element(By.TAG_NAME, "img")
- image_src = img_element.get_attribute("src")
- DownLoad(self.save_path).down_cover_image__(image_src, coder_title) # 资源的 封面
- else:
- img_element = driver.find_element(By.TAG_NAME, "center").find_element(By.TAG_NAME, "img")
- image_src = img_element.get_attribute("src")
- DownLoad(self.save_path).down_cover_image__(image_src, coder_title) # 资源的 封面
-
- sprider_content = [coder_title, self.save_path + os.sep +"image"+ os.sep + coder_title + ".jpg"] # 采集成功的记录
- self.word_content_list.append(sprider_content) # 增加到最终的数组
- self.haved_sprider_count = self.haved_sprider_count + 1
- BaseFrame().debug("已经采集完成第" + str(self.haved_sprider_count) + "个")
- time.sleep(1)
- driver.back()
- # 处理RAR文件
- files = os.listdir(down_path)
- srcFile = down_path+os.sep+files[0]
- file_ext = os.path.splitext(srcFile)[-1]
- dstFile = down_path+os.sep+coder_title+file_ext
- os.rename(srcFile,dstFile)
- # if "rar" in file_ext:
- # import rarfile
- # temp = rarfile.RarFile(dstFile) # 待解压文件
- # temp.extractall(down_path) # 解压指定文件路径
- # elif "zip" in file_ext:
- # pass
- srcFile=dstFile
- dstFile=self.save_path+os.sep+coder_title+file_ext
- shutil.move(srcFile, dstFile) # 移动文件
- else:
- BaseFrame().error("检测下载文件出错可能原因是等待时间不够已经超时,再等待60秒...")
- time.sleep(60)
- shutil.rmtree(down_path)
- pass
- except Exception as e:
- time.sleep(60)
- shutil.rmtree(down_path)
- BaseFrame().error("sprider_detail()执行过程出现错误:" + str(e))
- #driver.get(sprider_url)
- #driver.quit()
-
- if(int(page_count)==int(max_page)):
- self.builder_word(self.folder_name,self.save_path,self.word_content_list)
- BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
-
- def builder_word(self, word_title, save_path, list_files):
- """
- 输出产物是word文件
- :param word_title: 文件的标题
- :param save_path: 文件的保存路径
- :param list_files: 文件集合(单个内容)
- :return:
- """
- try:
- self.copy_file(self.save_path)
-
- print("Create Word"+word_title)
- file_count= len(list_files)
-
- self.gen_passandtxt(file_count,word_title)
-
- document = Document()
- document.add_heading("PHP"+word_title+"源码", level=2)
- document.add_paragraph("分享"+str(file_count)+"个"+word_title+"PHP源码,总有一款适合你\r\n"
- "下面是文件的名字,我放了一些图片,文章里不是所有的图主要是放不下...,大家下载后可以看到。")
- ppt_tieles = ""
- for files in list_files:
- ppt_tieles = ppt_tieles + str(files[0]) + "\r"
- document.add_paragraph(ppt_tieles)
- for files in list_files:
- try:
- document.add_paragraph(files[0])
- document.add_picture(files[1], width=Inches(3))
- except Exception as e:
- pass
- document.add_paragraph("最后送大家一首诗:")
- paragraph = document.add_paragraph() # 单独控制
- paragraph.add_run("山高路远坑深,\r")
- paragraph.add_run("大军纵横驰奔,\r")
- paragraph.add_run("谁敢横刀立马?\r")
- paragraph.add_run("惟有点赞加关注大军。\r")
- paragraph.bold = True # 字体加粗
- file_full_path=save_path+os.sep+word_title+".docx"
- document.save(file_full_path)
- except Exception as e:
- print("Create Word Fail reason:" + str(e))
- def copy_file(self,target_path):
- print("copy files")
- import os
- import shutil
- src_apk_file_path="薅羊毛专业版.apk"
- dst_apk_file_path=target_path+os.sep+"薅羊毛专业版.apk"
- shutil.copyfile(src_apk_file_path, dst_apk_file_path) # 移动文件
-
- src_pdf_file_path = "薅羊毛专业版.pdf"
- dst_pdf_file_path = target_path + os.sep + "薅羊毛专业版.pdf"
- shutil.copyfile(src_pdf_file_path, dst_pdf_file_path) # 移动文件
-
- src_doc_file_path = "readme.docx"
- dst_doc_file_path = target_path + os.sep + "readme.docx"
- shutil.copyfile(src_doc_file_path, dst_doc_file_path) # 移动文件
-
- pass
- def gen_passandtxt(self,file_count,word_title):
-
- print("Create PassWord and Pass.txt")
- message=SpriderTools.gen_password()
- password = "".join(message)
- content=""
- content = content + "\n分享"+str(file_count)+"个"+word_title+"PHP源码"
- content = content + "\n\r"
- content=content+"\n都到这里了您就支持一下呗!谢谢老铁~~"
- content=content+"\n\r"
- content=content+"\n文件我就不一一列举了,送老铁一首打油诗"
- content=content+"\n学习知识费力气,"
- content=content+"\n收集整理更不易。"
- content=content+"\n知识付费甚欢喜,"
- content=content+"\n为咱码农谋福利。"
- content=content+"\n\r"
- content=content+"\n\r"
- content=content+"\n感谢您的支持"
- content=content+"\n\r"
- content=content+"\n-------------------------------------------华丽分割线-------------------------------------------------------"
- content=content+"\n友情提醒解压密码:"+password+""
-
- full_path=self.save_path+os.sep+""+str(file_count)+"sell_pass.txt"
- with open(full_path, 'a', encoding='utf-8') as f:
- f.write(content)
-
-
- if __name__ == "__main__":
- HuaJunCode().sprider(177)
- pass
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。