当前位置:   article > 正文

python处理大数据文件,set、enumerate使用,python传参,获取文件路径_python set可以enumerate

python set可以enumerate
 
###   enumerate(ad_set,start=1)
###   start=1:指定索引起始值
  1. #-*- coding:utf-8 -*-
  2. #唯一号码转家庭、非家庭
  3. #编写者:zqm
  4. #日期:20170810
  5. #功能:分别找出家庭,非家庭用户,家庭的编号转jz编号,非家庭重新编号且相同编号相同
  6. ######################################################################
  7. # 备注:
  8. # File_target_zhengq_temp(临时文件)是处理的文件中的政企数据
  9. #
  10. # 调用方式:python Jzad_jiat_zhengq.py /zqm/处理的文件.txt
  11. #
  12. # 注意:
  13. # 此程序只适用于ad在File_deal文件的第一列,如果不在第一列,请酌情修改程序
  14. #######################################################################
  15. from datetime import datetime
  16. from sys import argv
  17. import os
  18. #配置文件
  19. File_source = "/JZ_lzy.txt"
  20. #处理文件所在的目录
  21. dir = os.getcwd()+"/"
  22. #要处理的文件
  23. File_deal = argv[1]
  24. if File_deal.find("/") != -1:
  25. File_deal = argv[1].split("/")[-1]
  26. #临时文件即处理文件里的政企数据
  27. File_target_zhengq_temp = dir+"政企原始数据.txt"
  28. #政企结果文件
  29. File_target_zhengq = dir+"zq_"+File_deal
  30. #家庭结果文件
  31. File_target_jiating = dir+"jt_"+File_deal
  32. print ("程序已开始执行!!!")
  33. print (datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  34. dist_source = {}
  35. list = []
  36. ad_set = set()
  37. dist2 = {}
  38. def Read_File_source():
  39. with open(File_source, "r") as ff:
  40. for line in ff:
  41. line = line.strip().split("\t")
  42. dist_source[line[0]] = line[1]
  43. def Main_zhengqi():
  44. with open(File_target_zhengq_temp, "w") as f_write:
  45. with open(File_deal, "r") as ff:
  46. for line in ff:
  47. line = line.strip().split("\t")
  48. if line[0] not in dist_source:
  49. f_write.write("\t".join(line) + "\n")
  50. ad_set.add(line[0])
  51. with open(File_target_zhengq_temp,"r") as ff:
  52. for line in ff:
  53. list.append(line)
  54. with open(File_target_zhengq, "w") as f_write:
  55. for num,ad in enumerate(ad_set,start=1):
  56. dist2[ad] = num
  57. for line in list:
  58. line = line.strip().split("\t")
  59. if line[0] in dist2:
  60. f_write.write(str(dist2[line[0]])+"\t"+"\t".join(line[1:])+"\n")
  61. print("政企已完成!!!")
  62. print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  63. def Main_jiating():
  64. with open(File_target_jiating, "w") as f_write:
  65. with open(File_deal, "r") as ff:
  66. for line in ff:
  67. line = line.strip().split("\t")
  68. if line[0] not in dist_source:
  69. continue
  70. line[0] = dist_source[line[0]]
  71. f_write.write("\t".join(line) + "\n")
  72. print("家庭已完成!!!")
  73. print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  74. if __name__ == "__main__":
  75. Read_File_source()
  76. Main_jiating()
  77. Main_zhengqi()
  78. print("程序已完成!!!")
  79. print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))


版权声明:原创文章,未经允许不得转载,谢谢。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/202021
推荐阅读
相关标签
  

闽ICP备14008679号