赞
踩
爬虫模拟访问浏览器,selenium已经不能用,使用webdriver需要下载谷歌浏览器驱动chromedriver.exe放到python环境Scripts文件夹下
from selenium import webdriver
option = webdriver.ChromeOptions()
option.add_argument('--headless') # 浏览器不提供可视化页面
# 设置浏览器下载文件的路径
pref = {"download.default_directory": self.ERP_FILE_DIR}
option.add_experimental_option("prefs", pref)
self.driver = webdriver.Chrome(options=option)
self.driver.get(self.login_url)
# 复制粘贴model.xlsx并命名为new.xlsx
from openpyxl import load_workbook
temp_wb = load_workbook("model.xlsx")
temp_wb.save("new.xlsx")
wb = load_workbook(excel_filename)
sheet = wb[self.sheet_file]
# 合并后只可以往左上角写入数据,也就是区间中:左边的坐标。sheet.merge_cells('A1:C3') # 合并一个矩形区域中的单元格
sheet.merge_cells("D1:K1")
sheet.merge_cells("D3:E3")
# 设置表格长度和宽度
sheet.column_dimensions['A'].width = 23
# 隐藏相应列
sheet.column_dimensions.group('G', 'J', hidden=True)
sheet.column_dimensions.group('W', hidden=True)
# 对excel进行缩放80%
sheet.views.sheetView[0].zoomScale = 80
sheet.freeze_panes = 'B6' # 固定第一列和前5行,固定规则,上边行,左边列
# 写实际内容,with解决了excel表每次打开都要修复的问题
try:
df = pd.DataFrame(content)
df1 = pd.DataFrame(pd.read_excel(self.GENERATE_FILE_DIR + '%s_%s.xlsx' % (self.prefix, self.department),
sheet_name=self.sheet_file))
# 写入内容时mode='a'以追加方式写入,不存在sheet表会自动创建,存在则直接写,不会覆盖原来内容
with pd.ExcelWriter(self.GENERATE_FILE_DIR + '%s_%s.xlsx' % (self.prefix, self.department),
engine='openpyxl', mode='a') as writer:
book = load_workbook(self.GENERATE_FILE_DIR + '%s_%s.xlsx' % (self.prefix, self.department))
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df_rows = df1.shape[0] # 获取原数据的行数 df1.shape[1]为列数
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name=self.sheet_file, startrow=df_rows + 1, index=False, header=False)
except Exception as e:
traceback.print_exc()
logger.error("往excel文件%s中sheet表%s写入数据失败,%s" % (self.department, self.sheet_file, e))
return {"result": False, "value": None, "info": {"en": e, "cn": e}}
scheduler = BlockingScheduler()
# 在每个月的10号 6:00 运行
scheduler.add_job(
AutoSendStockAgingReport().main,
trigger='cron', month='1-12', day='10', hour='6', minute='0'
)
try:
scheduler.start()
logger.info("statistic scheduler start success")
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
logger.info("statistic scheduler start-up fail")
pandas 具体百度
data = pd.read_excel(excel_file, sheet_name=0, skiprows=10, names=name)
df = data.groupby(by=["Group"])
# 分组求和Qty,Amount,后续确认分组求和的分组是否正确
group1 = df["Total Amount"].aggregate(np.sum)
group2 = df["Total Amount.1"].sum()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。