当前位置:   article > 正文

python 使用pandas或xlrd、xlwt实现对Excel的读取、添加、追加、删除、重复值处理、缺失值处理、获取行数、列数、获取一行、一列数据等一系列封装_python excel插入列

python excel插入列

不说了,又是造轮子的一天。在此我要严重批评CSDN或百度一堆浑水摸鱼的,某些人明明代码明显报错也来上传发博客,要么就是标题党,代码没报错但压根就不是实现那个功能的,简直是浪费时间。

废话不多说直接贴代码:

先新建一个data文件夹,文件路径我填的是 ./data/

使用xlrd、xlwt:

  1. import xlrd
  2. import xlwt
  3. import xlutils.copy
  4. class XLSX:
  5. __workbook = xlwt.Workbook(encoding='utf-8')
  6. __sheet = __workbook.add_sheet('Sheet1', cell_overwrite_ok=True)
  7. # 保存
  8. def save(self, name, head_list, data_list):
  9. path = './data/' + name + '.xlsx'
  10. head = head_list
  11. data = data_list
  12. __row = 0
  13. __col = 0
  14. for head in head:
  15. self.__sheet.write(__row, __col, head)
  16. __col = __col + 1
  17. for i in range(0, len(data)):
  18. for j in range(1, len(data[i]) + 1):
  19. self.__sheet.write(j, i, data[i][j - 1])
  20. self.__workbook.save(path)
  21. # 在原来的基础上增加 n 列,包含表头
  22. @staticmethod
  23. def save_append_col(name, data_list, head):
  24. path = './data/' + name + '.xlsx'
  25. file = xlrd.open_workbook(path)
  26. table = file.sheet_by_index(0)
  27. col_len = table.ncols
  28. ws = xlutils.copy.copy(file)
  29. ws_table = ws.get_sheet(0)
  30. ws_table.write(0, col_len, head)
  31. for i in range(1, len(data_list) + 1):
  32. ws_table.write(i, col_len, data_list[i - 1])
  33. ws.save(path)
  34. # 在原来的基础上增加 n 列,包含表头,如果存在就在该列下面继续添加
  35. @staticmethod
  36. def save_append_col_append(name, data_list, head, line_index):
  37. path = './data/' + name + '.xlsx'
  38. file = xlrd.open_workbook(path)
  39. table = file.sheet_by_index(0)
  40. cols = table.row_values(0)
  41. if head in cols:
  42. col_len = cols.index(head)
  43. ws = xlutils.copy.copy(file)
  44. ws_table = ws.get_sheet(0)
  45. for i in range(0, len(data_list)):
  46. ws_table.write(line_index + i - 1, col_len, data_list[i])
  47. ws.save(path)
  48. else:
  49. col_len = table.ncols
  50. ws = xlutils.copy.copy(file)
  51. ws_table = ws.get_sheet(0)
  52. ws_table.write(0, col_len, head)
  53. for i in range(1, len(data_list) + 1):
  54. ws_table.write(i, col_len, data_list[i - 1])
  55. ws.save(path)
  56. # 在原来的基础上增加 n 行,这里是按照每一行的方式追加
  57. @staticmethod
  58. def save_append_row(name, data_list):
  59. path = './data/' + name + '.xlsx'
  60. file = xlrd.open_workbook(path)
  61. table = file.sheet_by_index(0)
  62. row_len = table.nrows
  63. col_len = table.ncols
  64. ws = xlutils.copy.copy(file)
  65. ws_table = ws.get_sheet(0)
  66. for i in range(0, len(data_list)):
  67. for j in range(0, col_len):
  68. ws_table.write(row_len + i, j, data_list[i][j])
  69. ws.save(path)
  70. # 获取文件所有数据
  71. @staticmethod
  72. def get_all_line(file_name):
  73. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  74. table = file.sheets()[0]
  75. a = []
  76. for i in range(1, table.nrows):
  77. a.append(table.row_values(i))
  78. return a
  79. # 获取文件所有数据,以行的方式
  80. @staticmethod
  81. def get_all_line_col(file_name):
  82. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  83. sheet = file.sheet_by_index(0) # 获取第一个表格
  84. result = []
  85. rows = sheet.nrows
  86. for row_idx in range(rows):
  87. row_values = sheet.row_values(row_idx)
  88. result.append(row_values)
  89. return result
  90. # 获取文件指定一列的所有数据
  91. @staticmethod
  92. def get_all_col(file_name, col_index):
  93. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  94. table = file.sheets()[0]
  95. a = []
  96. for i in range(1, table.nrows):
  97. a.append(table.row_values(i)[col_index])
  98. return a
  99. # 获取文件指定一行的所有数据
  100. @staticmethod
  101. def get_one_line(file_name, line_count):
  102. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  103. t1 = file.sheets()[0]
  104. return t1.row_values(line_count)
  105. # 获取文件指定一行的指定一列的数据
  106. @staticmethod
  107. def get_one_line_col(file_name, line_count, col_count):
  108. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  109. t1 = file.sheets()[0]
  110. return t1.row_values(line_count)[col_count]
  111. # 获取文件总行数
  112. @staticmethod
  113. def get_row_length(file_name):
  114. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  115. table = file.sheet_by_index(0)
  116. return table.nrows
  117. # 获取文件指定一列的长度,不包含空值
  118. @staticmethod
  119. def get_one_col_length(file_name, col_index):
  120. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  121. table = file.sheets()[0]
  122. col_len = 0
  123. for row_idx in range(table.nrows):
  124. cell_value = table.cell_value(row_idx, col_index)
  125. if cell_value != '':
  126. col_len += 1
  127. return col_len
  128. # 获取总列数
  129. @staticmethod
  130. def get_col_length(file_name):
  131. file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
  132. table = file.sheet_by_index(0)
  133. return table.ncols
  134. # 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
  135. def get_repeat_list(self, file_name, col_index):
  136. return self.find_duplicates(self.get_all_col(file_name, col_index))
  137. # 删除行,[行数1,[行数2]]
  138. @staticmethod
  139. def delete_line(file_name, delete_rows):
  140. path = './data/' + file_name + '.xlsx'
  141. workbook = xlrd.open_workbook(path)
  142. sheet = workbook.sheet_by_index(0)
  143. rows_num = sheet.nrows
  144. rows_data = []
  145. for r in range(rows_num):
  146. if r not in delete_rows:
  147. rows_data.append(sheet.row_values(r))
  148. book = xlwt.Workbook()
  149. sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
  150. # 将未删除行数据写入sheet
  151. for r, row in enumerate(rows_data):
  152. for c, val in enumerate(row):
  153. sheet1.write(r, c, val)
  154. book.save(path)
  155. # 对所有列所有行进行缺失值处理,使用“缺失补充”来替代
  156. @staticmethod
  157. def deletion(file_name):
  158. path = './data/' + file_name + '.xlsx'
  159. workbook = xlrd.open_workbook(path)
  160. sheet = workbook.sheet_by_index(0)
  161. rows_num = sheet.nrows
  162. cols_num = sheet.ncols
  163. data = []
  164. for r in range(rows_num):
  165. row = []
  166. for c in range(cols_num):
  167. value = sheet.cell_value(r, c)
  168. if value == '': # 如果数据为空,认为是缺失值
  169. value = '缺失补充' # 替换为'缺失'字符串
  170. row.append(value)
  171. data.append(row)
  172. book = xlwt.Workbook()
  173. sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
  174. for r in range(rows_num):
  175. for c in range(cols_num):
  176. sheet1.write(r, c, data[r][c])
  177. book.save(path)
  178. @staticmethod
  179. def find_duplicates(strs):
  180. result = []
  181. strs_dic = {}
  182. for index, s in enumerate(strs):
  183. if s not in strs_dic:
  184. strs_dic[s] = [index]
  185. else:
  186. strs_dic[s].append(index)
  187. for key in strs_dic:
  188. if len(strs_dic[key]) > 1:
  189. result.append(strs_dic[key])
  190. return result
  191. # 测试
  192. # xx = XLSX()
  193. # xx.save('ddd', ['表头1', '表头2', '表头3'],
  194. # [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
  195. # ['列3-第1行', '列3-第2行', '列3-第3行']])
  196. # xx.save_append_row('ddd', [['列1-第4行', '列2-第4行', '列3-第4行'], ['列1-第5行', '列2-第5行', '列3-第5行'],
  197. # ['列1-第6行', '列2-第6行', '列3-第6行']])
  198. # xx.save_append_col('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4')
  199. # xx.save_append_col_append('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4', 5)
  200. # xx.delete_line('ddd', [5, 6])
  201. # xx.deletion('ddd')
  202. # print(xx.get_all_line('ddd'))
  203. # print(xx.get_one_line('ddd', 2))
  204. # print(xx.get_all_col('ddd', 3))
  205. # print(xx.get_one_line_col('ddd', 1, 0))
  206. # print(xx.get_row_length('ddd'))
  207. # print(xx.get_col_length('ddd'))
  208. # print(ee.get_repeat_list('ddd',1))
  209. # print(xx.get_one_col_length('ddd', 3))

使用pandas:

  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. class EXCEL:
  5. # 保存
  6. @staticmethod
  7. def save(name, head_list, data_list):
  8. path = r'./data/' + name + '.xlsx'
  9. temp = {}
  10. for i in range(len(head_list)):
  11. temp[str(head_list[i])] = data_list[i]
  12. data = pd.DataFrame(temp)
  13. data.to_excel(path, index=False)
  14. # 在原来的基础上增加一个sheet表
  15. @staticmethod
  16. def append_sheet(name, sheet_name, head_list):
  17. write = pd.DataFrame(columns=head_list)
  18. path = r'./data/' + name + '.xlsx'
  19. if not os.path.exists(path):
  20. write = pd.ExcelWriter(path, engine='openpyxl')
  21. with pd.ExcelWriter(path, mode='a', engine='openpyxl') as writer:
  22. write.to_excel(writer, sheet_name=sheet_name, index=False)
  23. # 在原来的基础上增加 n 行,包含表头,这里是按照每一列的方式追加
  24. @staticmethod
  25. def append_row(name, head_list, data_list):
  26. path = r'./data/' + name + '.xlsx'
  27. temp = {}
  28. for i in range(len(head_list)):
  29. temp[str(head_list[i])] = data_list[i]
  30. old_data = pd.read_excel(path)
  31. new_data = pd.DataFrame(temp)
  32. result = pd.concat([old_data, new_data])
  33. result.to_excel(path, index=False)
  34. # 在原来的基础上增加 n 列
  35. @staticmethod
  36. def append_col(name, head_name, data_list):
  37. path = r'./data/' + name + '.xlsx'
  38. df = pd.read_excel(path)
  39. new_col = pd.Series(data_list)
  40. new_col.name = head_name
  41. result = pd.concat([df, new_col], axis=1)
  42. result.to_excel(path, header=True, index=False)
  43. # 获取文件所有数据,以行的方式
  44. @staticmethod
  45. def get_all_line(file_name):
  46. path = r'./data/' + file_name + '.xlsx'
  47. df = pd.read_excel(path)
  48. return df.values.tolist()
  49. # 获取文件所有数据,以列的方式
  50. @staticmethod
  51. def get_all_line_col(file_name):
  52. path = r'./data/' + file_name + '.xlsx'
  53. df = pd.read_excel(path)
  54. cols = df.columns.tolist()
  55. result = []
  56. for col in cols:
  57. result.append(df[col].values.tolist())
  58. return result
  59. # 获取文件指定一列的所有数据
  60. @staticmethod
  61. def get_all_col(file_name, col_index):
  62. path = r'./data/' + file_name + '.xlsx'
  63. df = pd.read_excel(path)
  64. col = df.iloc[:, col_index]
  65. return col.values.tolist()
  66. # 获取文件指定一行的所有数据
  67. @staticmethod
  68. def get_one_line(file_name, line_count):
  69. path = r'./data/' + file_name + '.xlsx'
  70. df = pd.read_excel(path)
  71. row = df.iloc[line_count]
  72. return row.values.tolist()
  73. # 获取文件指定一行的指定一列的数据
  74. @staticmethod
  75. def get_one_line_col(file_name, line_count, col_count):
  76. path = r'./data/' + file_name + '.xlsx'
  77. df = pd.read_excel(path)
  78. row = df.iloc[line_count]
  79. col = row[col_count]
  80. if isinstance(col, np.float64):
  81. col = pd.Series(col)
  82. return col
  83. # 获取文件总行数
  84. @staticmethod
  85. def get_row_length(file_name):
  86. path = r'./data/' + file_name + '.xlsx'
  87. df = pd.read_excel(path)
  88. total_rows = df.shape[0]
  89. return total_rows
  90. # 获取文件总列数
  91. @staticmethod
  92. def get_col_length(file_name):
  93. path = r'./data/' + file_name + '.xlsx'
  94. df = pd.read_excel(path)
  95. total_cols = df.shape[1]
  96. return total_cols
  97. # 删除行,[行数1,[行数2]]
  98. @staticmethod
  99. def delete_line(file_name, delete_rows):
  100. path = r'./data/' + file_name + '.xlsx'
  101. df = pd.read_excel(path)
  102. df.drop(df.index[delete_rows], inplace=True)
  103. df.to_excel(path, index=False)
  104. # 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
  105. def get_repeat_list(self, file_name, col_index):
  106. return self.find_duplicates(self.get_all_col(file_name, col_index))
  107. # 对指定列 进行缺失值处理
  108. @staticmethod
  109. def deletion(file_name, col_head_name):
  110. path = r'./data/' + file_name + '.xlsx'
  111. df = pd.read_excel(path)
  112. # num_col_type = df[col_head_name].dtype
  113. # if num_col_type == 'int64' or num_col_type == 'float64':
  114. # df[col_head_name] = df[col_head_name].fillna(df[col_head_name].median())
  115. # else:
  116. # df[col_head_name] = df[col_head_name].fillna('缺失')
  117. # 不使用np.nan,也不判断类型了,统一用 “缺失补充”来替代
  118. df[col_head_name] = df[col_head_name].fillna('缺失补充')
  119. df.to_excel(path, index=False)
  120. @staticmethod
  121. def find_duplicates(strs):
  122. result = []
  123. strs_dic = {}
  124. for index, s in enumerate(strs):
  125. if s not in strs_dic:
  126. strs_dic[s] = [index]
  127. else:
  128. strs_dic[s].append(index)
  129. for key in strs_dic:
  130. if len(strs_dic[key]) > 1:
  131. result.append(strs_dic[key])
  132. return result
  133. # 测试
  134. # ee = EXCEL()
  135. # ee.save('ggg', ['表头1', '表头2', '表头3'],
  136. # [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
  137. # ['列3-第1行', '列3-第2行', '列3-第3行']])
  138. #
  139. # ee.append_sheet('ggg', 'Sheet2', ['表头1', '表头2', '表头3'])
  140. #
  141. # ee.append_row('ggg', ['表头1', '表头2', '表头3'],
  142. # [['列1-第1行', '列1-第2行'], ['列2-第1行', '列2-第2行'], ['列3-第1行', '列3-第2行']])
  143. #
  144. # ee.append_col('ggg', '表头4', ['列4-第1行', '列4-第2行', '列4-第3行'])
  145. # ee.delete_line('ggg', [2, 3])
  146. # ee.deletion('ggg', '表头4')
  147. #
  148. # print(ee.get_all_line('ggg'))
  149. # print(ee.get_one_line('ggg', 2))
  150. # print(ee.get_all_col('ggg', 3))
  151. # print(ee.get_one_line_col('ggg', 1, 0))
  152. # print(ee.get_row_length('ggg'))
  153. # print(ee.get_col_length('ggg'))
  154. # print(ee.get_repeat_list('ggg',1))
  155. # ee.repeat_count('ggg')

选一个就行了,不要把两份不同的写法混在一起用,两份写法有些是不一样的

最后我想说:


声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/69578
推荐阅读
相关标签
  

闽ICP备14008679号