赞
踩
基于我目前开发了几个影刀RPA的感受及 python性能提升的研究,及对比同样功能java实现
感觉提升RPA途径:
1、可以使用Excel的公式,这个效率还行,能充分利用计算机的CPU的线程、进程。
2、java或c#开发的应用程序, java稍微麻烦一点要安装java JDK 并且要配置号路径,设定一个批处理文件即可,
3、影刀的for 或 python的其他循环的方法,也效果不好,我测试是要 5个小时的,用java实现 90秒即可实现。速度是python常规方法的200倍,是使用python加速方法(apply())的128倍,我测试时数据是这样子的,网上大咖说python某些场景下,比其他开发语言慢200-300倍,是一致的,毕竟他是解释性语言,不是编译的,每一次都要解释之后才能运行。你有多少此循环,就有多少次解释,然后再执行。
说说我 python 提速的途径:,
a、我试过taichi,不支持字符串,仅支持纯数值
b、pypy3安装有问题,没安装上,
c、其他的加速工具,要么重新定义新的数据类型,要么改写之前写好的python代码。
附上我python 测试的几段代码,写得比较乱,但是主要方法发都有的:
- # import pandas as pd
- # import numpy as np
- import pandas as pd # modin.
- import time # 引入time模块
- # import taichi as ti
-
- # ti.init()
- # from distributed import Client
-
- # client = Client()
- # df = pd.DataFrame({'month': [1, 4, 7, 10],
- # 'year': [2012, 2014, 2013, 2014],
- # 'sale': [55, 40, 84, 31]})
- # print(df)
- # df.set_index('year') #如果索引列有数据重复也没关系
- # print(df)
- # aa = np.random.randn(6)
- # print('aa:{0}',aa)
- # --------------------------------------
- # df = pd.DataFrame({'a': np.random.randn(6),
- # 'b': ['foo', 'bar'] * 3,
- # 'c': np.random.randn(6)})
- # print('----------------')
- # def my_test(a, b):
- # return a + b
- #
- # print(df)
- # df['Value'] = df.apply(lambda row: my_test(row['a'], row['c']), axis=1,result_type=None) # 方法1
- # print(df)
- #
- # df['Value2'] = df['a'] + df['c'] # 方法2
- # print(df)
- # ------------------------------------
- dfListFromDb = pd.read_csv('D:\listFromDb.csv',header=None,usecols=[0,1,2,3]) # ,index_col=0 ,names=['OrderId','BelongTime','ShopCode','ShopCname'] ,index_col=0
- # print('dfListFromDb:'+str(dfListFromDb))
- # dfListFromDb.reindex(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
- # dfListFromDb.reindex(range(len(dfListFromDb)))
- # dfListFromDb.reset_index(inplace=True,drop=True)
- # print('dfListFromDb:' + str(dfListFromDb.index.names))
- # print(dfListFromDb.to_string())
-
- dfexcelData2Abstract = pd.read_csv('D:\excelData2Abstract.csv',header=None,skiprows=[0],usecols=[0,1]) # ,index_col=1 ,index_col=0
- # dfexcelData2Abstract.reset_index(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
- # dfexcelData2Abstract.reset_index(inplace=True,drop=True)
- # dfexcelData2Abstract.reindex(range(len(dfexcelData2Abstract)))
-
- # print('dfListFromDb:' + str(dfexcelData2Abstract.index.names)) #
- # print('dfexcelData2Abstract.to_string():'+str(dfexcelData2Abstract.to_string()))
- # print(dfexcelData2Abstract.to_string())
- # print('dfexcelData2Abstract:'+str(dfexcelData2Abstract))
- print('xxxxxxxxxxx')
- dfexcelData2Abstract2 = dfexcelData2Abstract.copy(True)
-
- # @ti.func
- def ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2):
- # print('dfexcelData2Abstract2:', dfexcelData2Abstract)
- # dfexcelData2Abstract2.iloc[:, 1:] = np.nan
- # print('aaaaaa')
- # print('After np.nan:dfexcelData2Abstract2:' + str(dfexcelData2Abstract2)) # range
- ticks = time.time()
- for x in range(len(dfexcelData2Abstract.iloc[:, 0])): # dfexcelData2Abstract2.iloc[:,0]:
- # print(str(x))
- for y in range(len(dfListFromDb.iloc[:, 0])):
- if str(dfexcelData2Abstract.iloc[x, 0]).__contains__(str(dfListFromDb.iloc[y, 0])):
- # dfexcelData2Abstract2.loc[y,1]=dfListFromDb.loc[y, 1]
- # print(str(dfexcelData2Abstract2.iloc[x,0])+'#'+str(dfListFromDb.iloc[y,0])+'相等')
- break
- # print('dfListFromDb.loc['+str(y)+', 1]:'+str(dfListFromDb.iloc[y, 1]))
- # print('dfexcelData2Abstract2.loc['+str(x)+', 1]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
- # print('dfListFromDb.loc['+str(y)+', 2]:'+str(dfListFromDb.iloc[y, 2]))
- # print('dfexcelData2Abstract2.loc['+str(x)+', 2]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
-
- ticks2 = (time.time() - ticks) / 60
- print('ForloopDoubleLIstRunTime 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
- list24result = []
- # AA='准备补充单号为空的数据:str(list24result[11]).find("基金代发任务"):'+str(list24result[11].find("基金代发任务"))+'#str(list24result[11]).find("订单编号"):'+str(str(list24result[11]).find("订单编号"))+'#str(list24result[21]):'+str(list24result[21])
-
- ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2)
- print(str(time.process_time())) #120second 263, 208 132 116 99 98
-
- # @ti.func
- def DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2):
- ticks = time.time()
- # dfListFromDb.convert_dtypes()
- # dfexcelData2Abstract2.convert_dtypes()
- for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
- # 获取用电量和时间(小时)
- # myrow = str(row[0])
- # print('dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
- for indexdb,dfListFromDb_rowdb in dfListFromDb.iterrows():
- # print('dfListFromDb_rowdb:'+str(dfListFromDb_rowdb))
- if str(dfexcelData2Abstract2_row[0]).__contains__(str(dfListFromDb_rowdb[0])):
- print('index:'+str(index)+'str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+'str(dfListFromDb_rowdb[0]):'+str(dfListFromDb_rowdb[0]))
- break
- # hour = row['date_time'].hour
- # 添加cost列表
- # energy_cost = apply_tariff(energy_used, hour)
- # energy_cost_list.append(energy_cost)
- # df['cost_cents'] = energy_cost_list
- ticks2 = (time.time() - ticks) / 60
- print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
-
- # DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2) #123secend
- # print(str(time.process_time())) #377 95 117 168 160
-
- # @ti.kernel
- def DoubleLIstRunTime_itertuples(dfListFromDb , dfexcelData2Abstract2):
- ticks = time.time()
- # dfListFromDb.convert_dtypes()
- # dfexcelData2Abstract2.convert_dtypes()
- for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
- # 获取用电量和时间(小时)
- # myrow = str(dfexcelData2Abstract2[0])
- print('外层循环dfexcelData2Abstract2Index:'+str(index)+'#dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
- i = 0
- for rowdb in dfListFromDb.itertuples():
- i=i+1
- if str(dfexcelData2Abstract2_row[0]).__contains__(str(getattr(rowdb,'_1'))):
- # print('内层循环TuplesIndex:'+str(i)+'#str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
- break
- # list_itertuples(dfexcelData2Abstract2_row,dfListFromDb)
- # hour = row['date_time'].hour
- # 添加cost列表
- # energy_cost = apply_tariff(energy_used, hour)
- # energy_cost_list.append(energy_cost)
- # df['cost_cents'] = energy_cost_list
- ticks2 = (time.time() - ticks) / 60
- print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
-
-
- # DoubleLIstRunTime_itertuples(dfListFromDb, dfexcelData2Abstract2) #123secend
- # print(str(time.process_time())) #377 95 117 168 160
-
- def testfindInApply(tempRow):
- i =0
- # print('外层循环tempRow:' + str(i) + '#str(tempRow):' + str(tempRow) + '#' )
- for rowdb in dfListFromDb.itertuples():
- i = i + 1
- if str(tempRow[0]).__contains__(str(getattr(rowdb, '_1'))):
- # print('内层循环TuplesIndex:'+str(i)+'#str(tempRow[0]):'+str(tempRow[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
- tempRow[2]=str(getattr(rowdb, '_1'))
- return True
- # else:
- # return False
-
- # print(dfexcelData2Abstract2)
- # dfexcelData2Abstract2[2]='-----------------------------------'
- # print(dfexcelData2Abstract2)
- # ticks = time.time()
- # dfexcelData2Abstract2.apply(testfindInApply,axis=1)
- # ticks2 = (time.time() - ticks) / 60
- # print('dfexcelData2Abstract2.apply 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
- # print(dfexcelData2Abstract2)
- # dfexcelData2Abstract2.to_csv("d:\dfexcelData2Abstract2Output.csv")
-
java代码:
- package com.company;
-
- import javax.swing.*;
- import java.io.*;
- import java.nio.charset.StandardCharsets;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Iterator;
- import java.util.List;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
-
- import static sun.misc.Version.print;
- import static sun.misc.Version.println;
-
- public class DbListModify {
- static String[][] arrexcelData2Abstract;
- static String[][] arrlistFromDb;
- static File file;
- static File fileCsv;
- static SimpleDateFormat df;
-
- public static void main(String[] args) {
- // write your code here
- int IntCsvexcelData2AbstractLength=0;
- int IntCsvlistFromDbLength=0;
- df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
-
-
- try {
- String content = "";
- file =new File("D:\\test_appendfile31"+df.format(System.currentTimeMillis()).toString().replace(":","").replace("/","")+".txt");
- fileCsv=new File("D:\\DbListModify.csv");
- if(!file.exists()){
- file.createNewFile();
- }
- if(!fileCsv.exists()){
- file.createNewFile();
- }
- //trueappend file
- FileWriter fileWritter = new FileWriter(file.getAbsoluteFile(),false);//.getAbsoluteFile() getName
- FileWriter CsvfileWritter = new FileWriter(fileCsv.getAbsoluteFile(), false);//.getAbsoluteFile() getName 数据专用文件
- fileWritter.write(content);
-
- //(),
- BufferedReader readerexcelData2Abstract = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\excelData2Abstract.csv"),"gb2312"));//GBK
- // reader.readLine();//,
- // System.out.println(reader.readLine());
- String lineexcelData2Abstract = null;
- int i=0;
- while((lineexcelData2Abstract=readerexcelData2Abstract.readLine())!=null){
- String itemexcelData2Abstract[] = lineexcelData2Abstract.split(",");//CSV抽象文件
- String last = itemexcelData2Abstract[itemexcelData2Abstract.length-1];//CSV数据库文件
- i=i+1;
- }
- IntCsvexcelData2AbstractLength=i;
- fileWritter.write("IntCsvexcelData2AbstractLength:"+IntCsvexcelData2AbstractLength+"\r\n");
- readexcelData2AbstractCSV("d:\\excelData2Abstract.csv",IntCsvexcelData2AbstractLength,3);
-
- System.out.println("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString());
- fileWritter.write("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString()+"\r\n");
- i=0;
- //(),
- BufferedReader readerlistFromDb = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\listFromDb.csv"), "utf-8"));//GBK
- // reader.readLine();//,
- // System.out.println(reader.readLine());
- String linelistFromDb = null;
- while((linelistFromDb=readerlistFromDb.readLine())!=null){
- String itemlistFromDb[] = linelistFromDb.split(",");//CSV,,
- String last = itemlistFromDb[itemlistFromDb.length-1];//CSV,-1
- i=i+1;
- }
- IntCsvlistFromDbLength=i;
- fileWritter.write("IntCsvlistFromDbLength:"+IntCsvlistFromDbLength+"\r\n");
-
- readlistFromDbCSV("d:\\listFromDb.csv",IntCsvexcelData2AbstractLength,3);
- System.out.println("arrlistFromDb"+df.format(System.currentTimeMillis()).toString());
- fileWritter.write("CsvexcelData2Abstract赋值开始:"+df.format(System.currentTimeMillis()).toString()+"\r\n");
- for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
- {
- for(int intb=0;intb<IntCsvlistFromDbLength;intb++)
- {
- if (arrexcelData2Abstract[inta][0].toString().trim().contains(arrlistFromDb[intb][0].toString().trim()))
- {
- // System.out.println("arrexcelData2Abstract[inta][0].toString().trim():"+arrexcelData2Abstract[inta][0].toString().trim()+"#arrlistFromDb[intb][0].toString().trim():"+arrlistFromDb[intb][0].toString().trim());
- arrexcelData2Abstract[inta][2]=arrlistFromDb[intb][1].toString().trim();
- break;
- }
- }
- }
- fileWritter.write("CsvexcelData2Abstract赋值结束:写入CSV开始"+df.format(System.currentTimeMillis()).toString()+"\r\n");
- StringBuilder stringBuilder =new StringBuilder();
- for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
- {
- // CsvfileWritter.write(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
- // fileWritter.write(""+arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
- stringBuilder.append(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
- System.out.println(inta);
- }
- CsvfileWritter.write(stringBuilder.toString());
- fileWritter.write("写入CSV完成"+df.format(System.currentTimeMillis()).toString()+"\r\n");
- fileWritter.flush(); fileWritter.close();
- CsvfileWritter.flush();CsvfileWritter.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- }
-
- private static void readexcelData2AbstractCSV(String pFilename,int IntCsvexcelData2AbstractLength,int intcollen) {
-
- int i=0;
- try {
- BufferedReader br = new BufferedReader(new FileReader(pFilename));
- arrexcelData2Abstract = new String[IntCsvexcelData2AbstractLength][intcollen];
- // BufferedReader br = new BufferedReader(new FileReader(pFilename));
- String line = " ";
- String [] temp;
- while ((line = br.readLine())!= null){
- temp = line.split(","); //split spaces
- for (int j = 0; j<3; j++) {
- if (j==2)
- {
- arrexcelData2Abstract[i][j] ="";
- }
- else
- arrexcelData2Abstract[i][j] =temp[j];
- // System.out.println("readexcelData2AbstractCSVtemp["+j+"]:"+temp[j].toString());
- }
- // System.out.println("arr["+i+"]:"+arr[i].toString());
- System.out.println("i:"+i);
- // System.out.println("temp:"+temp.);
- i=i+1;
- if (i==IntCsvexcelData2AbstractLength-2)
- return;
- }
-
- // for (int m=0;m<introwlen;m++) {
- // System.out.print(arr[m].toString());
- // }
- }catch (Exception e) {
- e.printStackTrace();
- }
- finally{
- }
- // return arr;
- }
-
- private static void readlistFromDbCSV(String pFilename,int IntCsvlistFromDbLength,int intcollen) {
-
- int i=0;
- try {
- BufferedReader br = new BufferedReader(new FileReader(pFilename));
- arrlistFromDb = new String[IntCsvlistFromDbLength][intcollen];
- // BufferedReader br = new BufferedReader(new FileReader(pFilename));
- String line = " ";
- String [] temp;
- while ((line = br.readLine())!= null){
- temp = line.split(","); //split spaces
- for (int j = 0; j<2; j++) {
- arrlistFromDb[i][j] =temp[j];
- System.out.println("readlistFromDbCSVtemp["+j+"]:"+temp[j].toString());
- }
- // System.out.println("arr["+i+"]:"+arr[i].toString());
- System.out.println("i:"+i);
- // System.out.println("temp:"+temp.);
- i=i+1;
- // if (i==introwlen-2)
- // return;
- }
-
- // for (int m=0;m<introwlen;m++) {
- // System.out.print(arr[m].toString());
- // }
- }catch (Exception e) {
- e.printStackTrace();
- }
- finally{
- }
- // return arr;
- }
-
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。