当前位置:   article > 正文

java VS Excel公式 VS 影刀RPA 之 python的 提速 方法_影刀引用python基础

影刀引用python基础

基于我目前开发了几个影刀RPA的感受及 python性能提升的研究,及对比同样功能java实现

感觉提升RPA途径:

1、可以使用Excel的公式,这个效率还行,能充分利用计算机的CPU的线程、进程。

2、java或c#开发的应用程序, java稍微麻烦一点要安装java JDK 并且要配置号路径,设定一个批处理文件即可,

3、影刀的for 或 python的其他循环的方法,也效果不好,我测试是要 5个小时的,用java实现 90秒即可实现。速度是python常规方法的200倍,是使用python加速方法(apply())的128倍,我测试时数据是这样子的,网上大咖说python某些场景下,比其他开发语言慢200-300倍,是一致的,毕竟他是解释性语言,不是编译的,每一次都要解释之后才能运行。你有多少此循环,就有多少次解释,然后再执行。

说说我 python 提速的途径:,

a、我试过taichi,不支持字符串,仅支持纯数值

b、pypy3安装有问题,没安装上,

c、其他的加速工具,要么重新定义新的数据类型,要么改写之前写好的python代码。

附上我python 测试的几段代码,写得比较乱,但是主要方法发都有的:

  1. # import pandas as pd
  2. # import numpy as np
  3. import pandas as pd # modin.
  4. import time # 引入time模块
  5. # import taichi as ti
  6. # ti.init()
  7. # from distributed import Client
  8. # client = Client()
  9. # df = pd.DataFrame({'month': [1, 4, 7, 10],
  10. # 'year': [2012, 2014, 2013, 2014],
  11. # 'sale': [55, 40, 84, 31]})
  12. # print(df)
  13. # df.set_index('year') #如果索引列有数据重复也没关系
  14. # print(df)
  15. # aa = np.random.randn(6)
  16. # print('aa:{0}',aa)
  17. # --------------------------------------
  18. # df = pd.DataFrame({'a': np.random.randn(6),
  19. # 'b': ['foo', 'bar'] * 3,
  20. # 'c': np.random.randn(6)})
  21. # print('----------------')
  22. # def my_test(a, b):
  23. # return a + b
  24. #
  25. # print(df)
  26. # df['Value'] = df.apply(lambda row: my_test(row['a'], row['c']), axis=1,result_type=None) # 方法1
  27. # print(df)
  28. #
  29. # df['Value2'] = df['a'] + df['c'] # 方法2
  30. # print(df)
  31. # ------------------------------------
  32. dfListFromDb = pd.read_csv('D:\listFromDb.csv',header=None,usecols=[0,1,2,3]) # ,index_col=0 ,names=['OrderId','BelongTime','ShopCode','ShopCname'] ,index_col=0
  33. # print('dfListFromDb:'+str(dfListFromDb))
  34. # dfListFromDb.reindex(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
  35. # dfListFromDb.reindex(range(len(dfListFromDb)))
  36. # dfListFromDb.reset_index(inplace=True,drop=True)
  37. # print('dfListFromDb:' + str(dfListFromDb.index.names))
  38. # print(dfListFromDb.to_string())
  39. dfexcelData2Abstract = pd.read_csv('D:\excelData2Abstract.csv',header=None,skiprows=[0],usecols=[0,1]) # ,index_col=1 ,index_col=0
  40. # dfexcelData2Abstract.reset_index(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
  41. # dfexcelData2Abstract.reset_index(inplace=True,drop=True)
  42. # dfexcelData2Abstract.reindex(range(len(dfexcelData2Abstract)))
  43. # print('dfListFromDb:' + str(dfexcelData2Abstract.index.names)) #
  44. # print('dfexcelData2Abstract.to_string():'+str(dfexcelData2Abstract.to_string()))
  45. # print(dfexcelData2Abstract.to_string())
  46. # print('dfexcelData2Abstract:'+str(dfexcelData2Abstract))
  47. print('xxxxxxxxxxx')
  48. dfexcelData2Abstract2 = dfexcelData2Abstract.copy(True)
  49. # @ti.func
  50. def ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2):
  51. # print('dfexcelData2Abstract2:', dfexcelData2Abstract)
  52. # dfexcelData2Abstract2.iloc[:, 1:] = np.nan
  53. # print('aaaaaa')
  54. # print('After np.nan:dfexcelData2Abstract2:' + str(dfexcelData2Abstract2)) # range
  55. ticks = time.time()
  56. for x in range(len(dfexcelData2Abstract.iloc[:, 0])): # dfexcelData2Abstract2.iloc[:,0]:
  57. # print(str(x))
  58. for y in range(len(dfListFromDb.iloc[:, 0])):
  59. if str(dfexcelData2Abstract.iloc[x, 0]).__contains__(str(dfListFromDb.iloc[y, 0])):
  60. # dfexcelData2Abstract2.loc[y,1]=dfListFromDb.loc[y, 1]
  61. # print(str(dfexcelData2Abstract2.iloc[x,0])+'#'+str(dfListFromDb.iloc[y,0])+'相等')
  62. break
  63. # print('dfListFromDb.loc['+str(y)+', 1]:'+str(dfListFromDb.iloc[y, 1]))
  64. # print('dfexcelData2Abstract2.loc['+str(x)+', 1]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
  65. # print('dfListFromDb.loc['+str(y)+', 2]:'+str(dfListFromDb.iloc[y, 2]))
  66. # print('dfexcelData2Abstract2.loc['+str(x)+', 2]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
  67. ticks2 = (time.time() - ticks) / 60
  68. print('ForloopDoubleLIstRunTime 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
  69. list24result = []
  70. # AA='准备补充单号为空的数据:str(list24result[11]).find("基金代发任务"):'+str(list24result[11].find("基金代发任务"))+'#str(list24result[11]).find("订单编号"):'+str(str(list24result[11]).find("订单编号"))+'#str(list24result[21]):'+str(list24result[21])
  71. ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2)
  72. print(str(time.process_time())) #120second 263, 208 132 116 99 98
  73. # @ti.func
  74. def DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2):
  75. ticks = time.time()
  76. # dfListFromDb.convert_dtypes()
  77. # dfexcelData2Abstract2.convert_dtypes()
  78. for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
  79. # 获取用电量和时间(小时)
  80. # myrow = str(row[0])
  81. # print('dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
  82. for indexdb,dfListFromDb_rowdb in dfListFromDb.iterrows():
  83. # print('dfListFromDb_rowdb:'+str(dfListFromDb_rowdb))
  84. if str(dfexcelData2Abstract2_row[0]).__contains__(str(dfListFromDb_rowdb[0])):
  85. print('index:'+str(index)+'str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+'str(dfListFromDb_rowdb[0]):'+str(dfListFromDb_rowdb[0]))
  86. break
  87. # hour = row['date_time'].hour
  88. # 添加cost列表
  89. # energy_cost = apply_tariff(energy_used, hour)
  90. # energy_cost_list.append(energy_cost)
  91. # df['cost_cents'] = energy_cost_list
  92. ticks2 = (time.time() - ticks) / 60
  93. print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
  94. # DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2) #123secend
  95. # print(str(time.process_time())) #377 95 117 168 160
  96. # @ti.kernel
  97. def DoubleLIstRunTime_itertuples(dfListFromDb , dfexcelData2Abstract2):
  98. ticks = time.time()
  99. # dfListFromDb.convert_dtypes()
  100. # dfexcelData2Abstract2.convert_dtypes()
  101. for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
  102. # 获取用电量和时间(小时)
  103. # myrow = str(dfexcelData2Abstract2[0])
  104. print('外层循环dfexcelData2Abstract2Index:'+str(index)+'#dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
  105. i = 0
  106. for rowdb in dfListFromDb.itertuples():
  107. i=i+1
  108. if str(dfexcelData2Abstract2_row[0]).__contains__(str(getattr(rowdb,'_1'))):
  109. # print('内层循环TuplesIndex:'+str(i)+'#str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
  110. break
  111. # list_itertuples(dfexcelData2Abstract2_row,dfListFromDb)
  112. # hour = row['date_time'].hour
  113. # 添加cost列表
  114. # energy_cost = apply_tariff(energy_used, hour)
  115. # energy_cost_list.append(energy_cost)
  116. # df['cost_cents'] = energy_cost_list
  117. ticks2 = (time.time() - ticks) / 60
  118. print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
  119. # DoubleLIstRunTime_itertuples(dfListFromDb, dfexcelData2Abstract2) #123secend
  120. # print(str(time.process_time())) #377 95 117 168 160
  121. def testfindInApply(tempRow):
  122. i =0
  123. # print('外层循环tempRow:' + str(i) + '#str(tempRow):' + str(tempRow) + '#' )
  124. for rowdb in dfListFromDb.itertuples():
  125. i = i + 1
  126. if str(tempRow[0]).__contains__(str(getattr(rowdb, '_1'))):
  127. # print('内层循环TuplesIndex:'+str(i)+'#str(tempRow[0]):'+str(tempRow[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
  128. tempRow[2]=str(getattr(rowdb, '_1'))
  129. return True
  130. # else:
  131. # return False
  132. # print(dfexcelData2Abstract2)
  133. # dfexcelData2Abstract2[2]='-----------------------------------'
  134. # print(dfexcelData2Abstract2)
  135. # ticks = time.time()
  136. # dfexcelData2Abstract2.apply(testfindInApply,axis=1)
  137. # ticks2 = (time.time() - ticks) / 60
  138. # print('dfexcelData2Abstract2.apply 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
  139. # print(dfexcelData2Abstract2)
  140. # dfexcelData2Abstract2.to_csv("d:\dfexcelData2Abstract2Output.csv")

java代码:

  1. package com.company;
  2. import javax.swing.*;
  3. import java.io.*;
  4. import java.nio.charset.StandardCharsets;
  5. import java.text.SimpleDateFormat;
  6. import java.util.ArrayList;
  7. import java.util.Arrays;
  8. import java.util.Iterator;
  9. import java.util.List;
  10. import java.io.BufferedReader;
  11. import java.io.FileReader;
  12. import java.io.IOException;
  13. import java.io.BufferedReader;
  14. import java.io.FileReader;
  15. import java.io.IOException;
  16. import java.util.ArrayList;
  17. import java.util.HashMap;
  18. import java.util.List;
  19. import java.util.Map;
  20. import static sun.misc.Version.print;
  21. import static sun.misc.Version.println;
  22. public class DbListModify {
  23. static String[][] arrexcelData2Abstract;
  24. static String[][] arrlistFromDb;
  25. static File file;
  26. static File fileCsv;
  27. static SimpleDateFormat df;
  28. public static void main(String[] args) {
  29. // write your code here
  30. int IntCsvexcelData2AbstractLength=0;
  31. int IntCsvlistFromDbLength=0;
  32. df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
  33. try {
  34. String content = "";
  35. file =new File("D:\\test_appendfile31"+df.format(System.currentTimeMillis()).toString().replace(":","").replace("/","")+".txt");
  36. fileCsv=new File("D:\\DbListModify.csv");
  37. if(!file.exists()){
  38. file.createNewFile();
  39. }
  40. if(!fileCsv.exists()){
  41. file.createNewFile();
  42. }
  43. //trueappend file
  44. FileWriter fileWritter = new FileWriter(file.getAbsoluteFile(),false);//.getAbsoluteFile() getName
  45. FileWriter CsvfileWritter = new FileWriter(fileCsv.getAbsoluteFile(), false);//.getAbsoluteFile() getName 数据专用文件
  46. fileWritter.write(content);
  47. //(),
  48. BufferedReader readerexcelData2Abstract = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\excelData2Abstract.csv"),"gb2312"));//GBK
  49. // reader.readLine();//,
  50. // System.out.println(reader.readLine());
  51. String lineexcelData2Abstract = null;
  52. int i=0;
  53. while((lineexcelData2Abstract=readerexcelData2Abstract.readLine())!=null){
  54. String itemexcelData2Abstract[] = lineexcelData2Abstract.split(",");//CSV抽象文件
  55. String last = itemexcelData2Abstract[itemexcelData2Abstract.length-1];//CSV数据库文件
  56. i=i+1;
  57. }
  58. IntCsvexcelData2AbstractLength=i;
  59. fileWritter.write("IntCsvexcelData2AbstractLength:"+IntCsvexcelData2AbstractLength+"\r\n");
  60. readexcelData2AbstractCSV("d:\\excelData2Abstract.csv",IntCsvexcelData2AbstractLength,3);
  61. System.out.println("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString());
  62. fileWritter.write("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString()+"\r\n");
  63. i=0;
  64. //(),
  65. BufferedReader readerlistFromDb = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\listFromDb.csv"), "utf-8"));//GBK
  66. // reader.readLine();//,
  67. // System.out.println(reader.readLine());
  68. String linelistFromDb = null;
  69. while((linelistFromDb=readerlistFromDb.readLine())!=null){
  70. String itemlistFromDb[] = linelistFromDb.split(",");//CSV,,
  71. String last = itemlistFromDb[itemlistFromDb.length-1];//CSV,-1
  72. i=i+1;
  73. }
  74. IntCsvlistFromDbLength=i;
  75. fileWritter.write("IntCsvlistFromDbLength:"+IntCsvlistFromDbLength+"\r\n");
  76. readlistFromDbCSV("d:\\listFromDb.csv",IntCsvexcelData2AbstractLength,3);
  77. System.out.println("arrlistFromDb"+df.format(System.currentTimeMillis()).toString());
  78. fileWritter.write("CsvexcelData2Abstract赋值开始:"+df.format(System.currentTimeMillis()).toString()+"\r\n");
  79. for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
  80. {
  81. for(int intb=0;intb<IntCsvlistFromDbLength;intb++)
  82. {
  83. if (arrexcelData2Abstract[inta][0].toString().trim().contains(arrlistFromDb[intb][0].toString().trim()))
  84. {
  85. // System.out.println("arrexcelData2Abstract[inta][0].toString().trim():"+arrexcelData2Abstract[inta][0].toString().trim()+"#arrlistFromDb[intb][0].toString().trim():"+arrlistFromDb[intb][0].toString().trim());
  86. arrexcelData2Abstract[inta][2]=arrlistFromDb[intb][1].toString().trim();
  87. break;
  88. }
  89. }
  90. }
  91. fileWritter.write("CsvexcelData2Abstract赋值结束:写入CSV开始"+df.format(System.currentTimeMillis()).toString()+"\r\n");
  92. StringBuilder stringBuilder =new StringBuilder();
  93. for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
  94. {
  95. // CsvfileWritter.write(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
  96. // fileWritter.write(""+arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
  97. stringBuilder.append(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
  98. System.out.println(inta);
  99. }
  100. CsvfileWritter.write(stringBuilder.toString());
  101. fileWritter.write("写入CSV完成"+df.format(System.currentTimeMillis()).toString()+"\r\n");
  102. fileWritter.flush(); fileWritter.close();
  103. CsvfileWritter.flush();CsvfileWritter.close();
  104. } catch (Exception e) {
  105. e.printStackTrace();
  106. }
  107. }
  108. private static void readexcelData2AbstractCSV(String pFilename,int IntCsvexcelData2AbstractLength,int intcollen) {
  109. int i=0;
  110. try {
  111. BufferedReader br = new BufferedReader(new FileReader(pFilename));
  112. arrexcelData2Abstract = new String[IntCsvexcelData2AbstractLength][intcollen];
  113. // BufferedReader br = new BufferedReader(new FileReader(pFilename));
  114. String line = " ";
  115. String [] temp;
  116. while ((line = br.readLine())!= null){
  117. temp = line.split(","); //split spaces
  118. for (int j = 0; j<3; j++) {
  119. if (j==2)
  120. {
  121. arrexcelData2Abstract[i][j] ="";
  122. }
  123. else
  124. arrexcelData2Abstract[i][j] =temp[j];
  125. // System.out.println("readexcelData2AbstractCSVtemp["+j+"]:"+temp[j].toString());
  126. }
  127. // System.out.println("arr["+i+"]:"+arr[i].toString());
  128. System.out.println("i:"+i);
  129. // System.out.println("temp:"+temp.);
  130. i=i+1;
  131. if (i==IntCsvexcelData2AbstractLength-2)
  132. return;
  133. }
  134. // for (int m=0;m<introwlen;m++) {
  135. // System.out.print(arr[m].toString());
  136. // }
  137. }catch (Exception e) {
  138. e.printStackTrace();
  139. }
  140. finally{
  141. }
  142. // return arr;
  143. }
  144. private static void readlistFromDbCSV(String pFilename,int IntCsvlistFromDbLength,int intcollen) {
  145. int i=0;
  146. try {
  147. BufferedReader br = new BufferedReader(new FileReader(pFilename));
  148. arrlistFromDb = new String[IntCsvlistFromDbLength][intcollen];
  149. // BufferedReader br = new BufferedReader(new FileReader(pFilename));
  150. String line = " ";
  151. String [] temp;
  152. while ((line = br.readLine())!= null){
  153. temp = line.split(","); //split spaces
  154. for (int j = 0; j<2; j++) {
  155. arrlistFromDb[i][j] =temp[j];
  156. System.out.println("readlistFromDbCSVtemp["+j+"]:"+temp[j].toString());
  157. }
  158. // System.out.println("arr["+i+"]:"+arr[i].toString());
  159. System.out.println("i:"+i);
  160. // System.out.println("temp:"+temp.);
  161. i=i+1;
  162. // if (i==introwlen-2)
  163. // return;
  164. }
  165. // for (int m=0;m<introwlen;m++) {
  166. // System.out.print(arr[m].toString());
  167. // }
  168. }catch (Exception e) {
  169. e.printStackTrace();
  170. }
  171. finally{
  172. }
  173. // return arr;
  174. }
  175. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/笔触狂放9/article/detail/282037
推荐阅读
相关标签
  

闽ICP备14008679号