赞
踩
不说了,又是造轮子的一天。在此我要严重批评CSDN或百度一堆浑水摸鱼的,某些人明明代码明显报错也来上传发博客,要么就是标题党,代码没报错但压根就不是实现那个功能的,简直是浪费时间。
废话不多说直接贴代码:
先新建一个data文件夹,文件路径我填的是 ./data/
使用xlrd、xlwt:
- import xlrd
- import xlwt
- import xlutils.copy
-
-
- class XLSX:
- __workbook = xlwt.Workbook(encoding='utf-8')
- __sheet = __workbook.add_sheet('Sheet1', cell_overwrite_ok=True)
-
- # 保存
- def save(self, name, head_list, data_list):
- path = './data/' + name + '.xlsx'
- head = head_list
- data = data_list
- __row = 0
- __col = 0
- for head in head:
- self.__sheet.write(__row, __col, head)
- __col = __col + 1
- for i in range(0, len(data)):
- for j in range(1, len(data[i]) + 1):
- self.__sheet.write(j, i, data[i][j - 1])
- self.__workbook.save(path)
-
- # 在原来的基础上增加 n 列,包含表头
- @staticmethod
- def save_append_col(name, data_list, head):
- path = './data/' + name + '.xlsx'
- file = xlrd.open_workbook(path)
- table = file.sheet_by_index(0)
- col_len = table.ncols
- ws = xlutils.copy.copy(file)
- ws_table = ws.get_sheet(0)
- ws_table.write(0, col_len, head)
- for i in range(1, len(data_list) + 1):
- ws_table.write(i, col_len, data_list[i - 1])
- ws.save(path)
-
- # 在原来的基础上增加 n 列,包含表头,如果存在就在该列下面继续添加
- @staticmethod
- def save_append_col_append(name, data_list, head, line_index):
- path = './data/' + name + '.xlsx'
- file = xlrd.open_workbook(path)
- table = file.sheet_by_index(0)
- cols = table.row_values(0)
- if head in cols:
- col_len = cols.index(head)
- ws = xlutils.copy.copy(file)
- ws_table = ws.get_sheet(0)
- for i in range(0, len(data_list)):
- ws_table.write(line_index + i - 1, col_len, data_list[i])
- ws.save(path)
- else:
- col_len = table.ncols
- ws = xlutils.copy.copy(file)
- ws_table = ws.get_sheet(0)
- ws_table.write(0, col_len, head)
- for i in range(1, len(data_list) + 1):
- ws_table.write(i, col_len, data_list[i - 1])
- ws.save(path)
-
- # 在原来的基础上增加 n 行,这里是按照每一行的方式追加
- @staticmethod
- def save_append_row(name, data_list):
- path = './data/' + name + '.xlsx'
- file = xlrd.open_workbook(path)
- table = file.sheet_by_index(0)
- row_len = table.nrows
- col_len = table.ncols
- ws = xlutils.copy.copy(file)
- ws_table = ws.get_sheet(0)
- for i in range(0, len(data_list)):
- for j in range(0, col_len):
- ws_table.write(row_len + i, j, data_list[i][j])
- ws.save(path)
-
- # 获取文件所有数据
- @staticmethod
- def get_all_line(file_name):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- table = file.sheets()[0]
- a = []
- for i in range(1, table.nrows):
- a.append(table.row_values(i))
- return a
-
- # 获取文件所有数据,以行的方式
- @staticmethod
- def get_all_line_col(file_name):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- sheet = file.sheet_by_index(0) # 获取第一个表格
- result = []
- rows = sheet.nrows
- for row_idx in range(rows):
- row_values = sheet.row_values(row_idx)
- result.append(row_values)
- return result
-
- # 获取文件指定一列的所有数据
- @staticmethod
- def get_all_col(file_name, col_index):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- table = file.sheets()[0]
- a = []
- for i in range(1, table.nrows):
- a.append(table.row_values(i)[col_index])
- return a
-
- # 获取文件指定一行的所有数据
- @staticmethod
- def get_one_line(file_name, line_count):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- t1 = file.sheets()[0]
- return t1.row_values(line_count)
-
- # 获取文件指定一行的指定一列的数据
- @staticmethod
- def get_one_line_col(file_name, line_count, col_count):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- t1 = file.sheets()[0]
- return t1.row_values(line_count)[col_count]
-
- # 获取文件总行数
- @staticmethod
- def get_row_length(file_name):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- table = file.sheet_by_index(0)
- return table.nrows
-
- # 获取文件指定一列的长度,不包含空值
- @staticmethod
- def get_one_col_length(file_name, col_index):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- table = file.sheets()[0]
- col_len = 0
- for row_idx in range(table.nrows):
- cell_value = table.cell_value(row_idx, col_index)
- if cell_value != '':
- col_len += 1
- return col_len
-
- # 获取总列数
- @staticmethod
- def get_col_length(file_name):
- file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
- table = file.sheet_by_index(0)
- return table.ncols
-
- # 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
- def get_repeat_list(self, file_name, col_index):
- return self.find_duplicates(self.get_all_col(file_name, col_index))
-
- # 删除行,[行数1,[行数2]]
- @staticmethod
- def delete_line(file_name, delete_rows):
- path = './data/' + file_name + '.xlsx'
- workbook = xlrd.open_workbook(path)
- sheet = workbook.sheet_by_index(0)
- rows_num = sheet.nrows
- rows_data = []
- for r in range(rows_num):
- if r not in delete_rows:
- rows_data.append(sheet.row_values(r))
- book = xlwt.Workbook()
- sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
- # 将未删除行数据写入sheet
- for r, row in enumerate(rows_data):
- for c, val in enumerate(row):
- sheet1.write(r, c, val)
- book.save(path)
-
- # 对所有列所有行进行缺失值处理,使用“缺失补充”来替代
- @staticmethod
- def deletion(file_name):
- path = './data/' + file_name + '.xlsx'
- workbook = xlrd.open_workbook(path)
- sheet = workbook.sheet_by_index(0)
- rows_num = sheet.nrows
- cols_num = sheet.ncols
- data = []
- for r in range(rows_num):
- row = []
- for c in range(cols_num):
- value = sheet.cell_value(r, c)
- if value == '': # 如果数据为空,认为是缺失值
- value = '缺失补充' # 替换为'缺失'字符串
- row.append(value)
- data.append(row)
- book = xlwt.Workbook()
- sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
- for r in range(rows_num):
- for c in range(cols_num):
- sheet1.write(r, c, data[r][c])
- book.save(path)
-
- @staticmethod
- def find_duplicates(strs):
- result = []
- strs_dic = {}
- for index, s in enumerate(strs):
- if s not in strs_dic:
- strs_dic[s] = [index]
- else:
- strs_dic[s].append(index)
- for key in strs_dic:
- if len(strs_dic[key]) > 1:
- result.append(strs_dic[key])
- return result
-
-
- # 测试
- # xx = XLSX()
- # xx.save('ddd', ['表头1', '表头2', '表头3'],
- # [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
- # ['列3-第1行', '列3-第2行', '列3-第3行']])
- # xx.save_append_row('ddd', [['列1-第4行', '列2-第4行', '列3-第4行'], ['列1-第5行', '列2-第5行', '列3-第5行'],
- # ['列1-第6行', '列2-第6行', '列3-第6行']])
- # xx.save_append_col('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4')
- # xx.save_append_col_append('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4', 5)
- # xx.delete_line('ddd', [5, 6])
- # xx.deletion('ddd')
- # print(xx.get_all_line('ddd'))
- # print(xx.get_one_line('ddd', 2))
- # print(xx.get_all_col('ddd', 3))
- # print(xx.get_one_line_col('ddd', 1, 0))
- # print(xx.get_row_length('ddd'))
- # print(xx.get_col_length('ddd'))
- # print(ee.get_repeat_list('ddd',1))
- # print(xx.get_one_col_length('ddd', 3))
使用pandas:
- import os
- import numpy as np
- import pandas as pd
-
-
- class EXCEL:
-
- # 保存
- @staticmethod
- def save(name, head_list, data_list):
- path = r'./data/' + name + '.xlsx'
- temp = {}
- for i in range(len(head_list)):
- temp[str(head_list[i])] = data_list[i]
- data = pd.DataFrame(temp)
- data.to_excel(path, index=False)
-
- # 在原来的基础上增加一个sheet表
- @staticmethod
- def append_sheet(name, sheet_name, head_list):
- write = pd.DataFrame(columns=head_list)
- path = r'./data/' + name + '.xlsx'
- if not os.path.exists(path):
- write = pd.ExcelWriter(path, engine='openpyxl')
- with pd.ExcelWriter(path, mode='a', engine='openpyxl') as writer:
- write.to_excel(writer, sheet_name=sheet_name, index=False)
-
- # 在原来的基础上增加 n 行,包含表头,这里是按照每一列的方式追加
- @staticmethod
- def append_row(name, head_list, data_list):
- path = r'./data/' + name + '.xlsx'
- temp = {}
- for i in range(len(head_list)):
- temp[str(head_list[i])] = data_list[i]
- old_data = pd.read_excel(path)
- new_data = pd.DataFrame(temp)
- result = pd.concat([old_data, new_data])
- result.to_excel(path, index=False)
-
- # 在原来的基础上增加 n 列
- @staticmethod
- def append_col(name, head_name, data_list):
- path = r'./data/' + name + '.xlsx'
- df = pd.read_excel(path)
- new_col = pd.Series(data_list)
- new_col.name = head_name
- result = pd.concat([df, new_col], axis=1)
- result.to_excel(path, header=True, index=False)
-
- # 获取文件所有数据,以行的方式
- @staticmethod
- def get_all_line(file_name):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- return df.values.tolist()
-
- # 获取文件所有数据,以列的方式
- @staticmethod
- def get_all_line_col(file_name):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- cols = df.columns.tolist()
- result = []
- for col in cols:
- result.append(df[col].values.tolist())
- return result
-
- # 获取文件指定一列的所有数据
- @staticmethod
- def get_all_col(file_name, col_index):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- col = df.iloc[:, col_index]
- return col.values.tolist()
-
- # 获取文件指定一行的所有数据
- @staticmethod
- def get_one_line(file_name, line_count):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- row = df.iloc[line_count]
- return row.values.tolist()
-
- # 获取文件指定一行的指定一列的数据
- @staticmethod
- def get_one_line_col(file_name, line_count, col_count):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- row = df.iloc[line_count]
- col = row[col_count]
- if isinstance(col, np.float64):
- col = pd.Series(col)
- return col
-
- # 获取文件总行数
- @staticmethod
- def get_row_length(file_name):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- total_rows = df.shape[0]
- return total_rows
-
- # 获取文件总列数
- @staticmethod
- def get_col_length(file_name):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- total_cols = df.shape[1]
- return total_cols
-
- # 删除行,[行数1,[行数2]]
- @staticmethod
- def delete_line(file_name, delete_rows):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- df.drop(df.index[delete_rows], inplace=True)
- df.to_excel(path, index=False)
-
- # 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
- def get_repeat_list(self, file_name, col_index):
- return self.find_duplicates(self.get_all_col(file_name, col_index))
-
- # 对指定列 进行缺失值处理
- @staticmethod
- def deletion(file_name, col_head_name):
- path = r'./data/' + file_name + '.xlsx'
- df = pd.read_excel(path)
- # num_col_type = df[col_head_name].dtype
- # if num_col_type == 'int64' or num_col_type == 'float64':
- # df[col_head_name] = df[col_head_name].fillna(df[col_head_name].median())
- # else:
- # df[col_head_name] = df[col_head_name].fillna('缺失')
- # 不使用np.nan,也不判断类型了,统一用 “缺失补充”来替代
- df[col_head_name] = df[col_head_name].fillna('缺失补充')
- df.to_excel(path, index=False)
-
- @staticmethod
- def find_duplicates(strs):
- result = []
- strs_dic = {}
- for index, s in enumerate(strs):
- if s not in strs_dic:
- strs_dic[s] = [index]
- else:
- strs_dic[s].append(index)
- for key in strs_dic:
- if len(strs_dic[key]) > 1:
- result.append(strs_dic[key])
- return result
-
-
- # 测试
- # ee = EXCEL()
- # ee.save('ggg', ['表头1', '表头2', '表头3'],
- # [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
- # ['列3-第1行', '列3-第2行', '列3-第3行']])
- #
- # ee.append_sheet('ggg', 'Sheet2', ['表头1', '表头2', '表头3'])
- #
- # ee.append_row('ggg', ['表头1', '表头2', '表头3'],
- # [['列1-第1行', '列1-第2行'], ['列2-第1行', '列2-第2行'], ['列3-第1行', '列3-第2行']])
- #
- # ee.append_col('ggg', '表头4', ['列4-第1行', '列4-第2行', '列4-第3行'])
- # ee.delete_line('ggg', [2, 3])
- # ee.deletion('ggg', '表头4')
- #
- # print(ee.get_all_line('ggg'))
- # print(ee.get_one_line('ggg', 2))
- # print(ee.get_all_col('ggg', 3))
- # print(ee.get_one_line_col('ggg', 1, 0))
- # print(ee.get_row_length('ggg'))
- # print(ee.get_col_length('ggg'))
- # print(ee.get_repeat_list('ggg',1))
- # ee.repeat_count('ggg')
选一个就行了,不要把两份不同的写法混在一起用,两份写法有些是不一样的
最后我想说:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。