当前位置:   article > 正文

Python 多线程分块读取文件_python 分块读取数组

python 分块读取数组

什么也不说,直接上代码,绝对看的懂

  1. # _*_coding:utf-8_*_
  2. import time, threading, ConfigParser
  3. '''
  4. Reader类,继承threading.Thread
  5. @__init__方法初始化
  6. @run方法实现了读文件的操作
  7. '''
  8. class Reader(threading.Thread):
  9. def __init__(self, file_name, start_pos, end_pos):
  10. super(Reader, self).__init__()
  11. self.file_name = file_name
  12. self.start_pos = start_pos
  13. self.end_pos = end_pos
  14. def run(self):
  15. fd = open(self.file_name, 'r')
  16. '''
  17. 该if块主要判断分块后的文件块的首位置是不是行首,
  18. 是行首的话,不做处理
  19. 否则,将文件块的首位置定位到下一行的行首
  20. '''
  21. if self.start_pos != 0:
  22. fd.seek(self.start_pos-1)
  23. if fd.read(1) != '\n':
  24. line = fd.readline()
  25. self.start_pos = fd.tell()
  26. fd.seek(self.start_pos)
  27. '''
  28. 对该文件块进行处理
  29. '''
  30. while (self.start_pos <= self.end_pos):
  31. line = fd.readline()
  32. '''
  33. do somthing
  34. '''
  35. self.start_pos = fd.tell()
  36. '''
  37. 对文件进行分块,文件块的数量和线程数量一致
  38. '''
  39. class Partition(object):
  40. def __init__(self, file_name, thread_num):
  41. self.file_name = file_name
  42. self.block_num = thread_num
  43. def part(self):
  44. fd = open(self.file_name, 'r')
  45. fd.seek(0, 2)
  46. pos_list = []
  47. file_size = fd.tell()
  48. block_size = file_size/self.block_num
  49. start_pos = 0
  50. for i in range(self.block_num):
  51. if i == self.block_num-1:
  52. end_pos = file_size-1
  53. pos_list.append((start_pos, end_pos))
  54. break
  55. end_pos = start_pos+block_size-1
  56. if end_pos >= file_size:
  57. end_pos = file_size-1
  58. if start_pos >= file_size:
  59. break
  60. pos_list.append((start_pos, end_pos))
  61. start_pos = end_pos+1
  62. fd.close()
  63. return pos_list
  64. if __name__ == '__main__':
  65. '''
  66. 读取配置文件
  67. '''
  68. config = ConfigParser.ConfigParser()
  69. config.readfp(open('conf.ini'))
  70. #文件名
  71. file_name = config.get('info', 'fileName')
  72. #线程数量
  73. thread_num = int(config.get('info', 'threadNum'))
  74. #起始时间
  75. start_time = time.clock()
  76. p = Partition(file_name, thread_num)
  77. t = []
  78. pos = p.part()
  79. #生成线程
  80. for i in range(thread_num):
  81. t.append(Reader(file_name, *pos[i]))
  82. #开启线程
  83. for i in range(thread_num):
  84. t[i].start()
  85. for i in range(thread_num):
  86. t[i].join()
  87. #结束时间
  88. end_time = time.clock()
  89. print "Cost time is %f" % (end_time - start_time)


声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/625839
推荐阅读
相关标签
  

闽ICP备14008679号