赞
踩
下面给大家分享一个脚本,关于大文件分割,如果能够给大家提供一点点帮助,小编不胜欢喜了。
大文件分割
用法:
1.python split_big_file.py
2.输入文件全路径名
3.输入期望的分割后每个小文件的行数
4.Just wait.
# -*- coding:utf-8 -*- import os,re,shutil import platform sys_name = platform.system().lower() SPLIT_CHAR = '\\' if sys_name.find('windows') != -1 else '/' print('input big files`s path:') _path = raw_input() names = [] pathes = [] if os.path.isfile(_path): print('is file') names.append(_path) else: print('is nothing') ''' elif os.path.isdir(_path): print('This is dir') pathes = os.listdir(_path) print('pathes='+str(pathes)) for i in range(len(pathes)): fullpath = _path+SPLIT_CHAR+pathes[i] print('fullpath='+fullpath) if os.path.isfile(fullpath): names.append(fullpath) files.append(open(fullpath).read().split('\n')) ''' print(len(names)) line_num = int(raw_input('every file`line num = ')) print('line number='+str(line_num)) for i in range(len(names)): _name = names[i] ori_name = _name.split(SPLIT_CHAR)[len(_name.split(SPLIT_CHAR))-1] dir_name = _name.replace(ori_name,'DIR_'+ori_name) dir_name = dir_name.replace('.','_') print ori_name print dir_name os.system('mkdir '+dir_name) count = 1 print '已处理:'+str(count)+'行' part_file = open(dir_name+SPLIT_CHAR+str(0)+'.part.txt','w') with open(_name, 'rb') as f: for line in f: if count%line_num == 0: part_file.close() part_file = open(dir_name+SPLIT_CHAR+str(int(count/line_num))+'.part.txt','w') part_file.write(line+'\n') count+=1 if count%100000 == 0: print '已处理:'+str(count)+'行' print '已处理:'+str(count)+'行' os.system('python ./get_name_logfile.py '+dir_name)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。