当前位置:   article > 正文

不同版本的pandas往excel追加数据的解决方案(亲测)_pandas支持append 版本

pandas支持append 版本

最近工作中需要往同一个excel里写好几个不同来源且格式不一样的数据,因为pandas直接写入比较困难,所以要么是每次写到一个文件合并,要么就通过往excel追加数据的方式解决这个问题,通过一顿谷歌,找到了以下的解决方案,并亲测有效,希望对以后遇到的同学有帮助。

pandas1.4.0 已经支持追加操作,通过配置if_sheet_exixts='overlay'参数就可以

pandas1.3.0+  

  1. from pathlib import Path
  2. from copy import copy
  3. from typing import Union, Optional
  4. import numpy as np
  5. import pandas as pd
  6. import openpyxl
  7. from openpyxl import load_workbook
  8. from openpyxl.utils import get_column_letter
  9. def copy_excel_cell_range(
  10. src_ws: openpyxl.worksheet.worksheet.Worksheet,
  11. min_row: int = None,
  12. max_row: int = None,
  13. min_col: int = None,
  14. max_col: int = None,
  15. tgt_ws: openpyxl.worksheet.worksheet.Worksheet = None,
  16. tgt_min_row: int = 1,
  17. tgt_min_col: int = 1,
  18. with_style: bool = True
  19. ) -> openpyxl.worksheet.worksheet.Worksheet:
  20. """
  21. copies all cells from the source worksheet [src_ws] starting from [min_row] row
  22. and [min_col] column up to [max_row] row and [max_col] column
  23. to target worksheet [tgt_ws] starting from [tgt_min_row] row
  24. and [tgt_min_col] column.
  25. @param src_ws: source worksheet
  26. @param min_row: smallest row index in the source worksheet (1-based index)
  27. @param max_row: largest row index in the source worksheet (1-based index)
  28. @param min_col: smallest column index in the source worksheet (1-based index)
  29. @param max_col: largest column index in the source worksheet (1-based index)
  30. @param tgt_ws: target worksheet.
  31. If None, then the copy will be done to the same (source) worksheet.
  32. @param tgt_min_row: target row index (1-based index)
  33. @param tgt_min_col: target column index (1-based index)
  34. @param with_style: whether to copy cell style. Default: True
  35. @return: target worksheet object
  36. """
  37. if tgt_ws is None:
  38. tgt_ws = src_ws
  39. # https://stackoverflow.com/a/34838233/5741205
  40. for row in src_ws.iter_rows(min_row=min_row, max_row=max_row,
  41. min_col=min_col, max_col=max_col):
  42. for cell in row:
  43. tgt_cell = tgt_ws.cell(
  44. row=cell.row + tgt_min_row - 1,
  45. column=cell.col_idx + tgt_min_col - 1,
  46. value=cell.value
  47. )
  48. if with_style and cell.has_style:
  49. # tgt_cell._style = copy(cell._style)
  50. tgt_cell.font = copy(cell.font)
  51. tgt_cell.border = copy(cell.border)
  52. tgt_cell.fill = copy(cell.fill)
  53. tgt_cell.number_format = copy(cell.number_format)
  54. tgt_cell.protection = copy(cell.protection)
  55. tgt_cell.alignment = copy(cell.alignment)
  56. return tgt_ws
  57. def append_df_to_excel(
  58. filename: Union[str, Path],
  59. df: pd.DataFrame,
  60. sheet_name: str = 'Sheet1',
  61. startrow: Optional[int] = None,
  62. max_col_width: int = 30,
  63. autofilter: bool = False,
  64. fmt_int: str = "#,##0",
  65. fmt_float: str = "#,##0.00",
  66. fmt_date: str = "yyyy-mm-dd",
  67. fmt_datetime: str = "yyyy-mm-dd hh:mm",
  68. truncate_sheet: bool = False,
  69. storage_options: Optional[dict] = None,
  70. **to_excel_kwargs
  71. ) -> None:
  72. """
  73. Append a DataFrame [df] to existing Excel file [filename]
  74. into [sheet_name] Sheet.
  75. If [filename] doesn't exist, then this function will create it.
  76. @param filename: File path or existing ExcelWriter
  77. (Example: '/path/to/file.xlsx')
  78. @param df: DataFrame to save to workbook
  79. @param sheet_name: Name of sheet which will contain DataFrame.
  80. (default: 'Sheet1')
  81. @param startrow: upper left cell row to dump data frame.
  82. Per default (startrow=None) calculate the last row
  83. in the existing DF and write to the next row...
  84. @param max_col_width: maximum column width in Excel. Default: 40
  85. @param autofilter: boolean - whether add Excel autofilter or not. Default: False
  86. @param fmt_int: Excel format for integer numbers
  87. @param fmt_float: Excel format for float numbers
  88. @param fmt_date: Excel format for dates
  89. @param fmt_datetime: Excel format for datetime's
  90. @param truncate_sheet: truncate (remove and recreate) [sheet_name]
  91. before writing DataFrame to Excel file
  92. @param storage_options: dict, optional
  93. Extra options that make sense for a particular storage connection, e.g. host, port,
  94. username, password, etc., if using a URL that will be parsed by fsspec, e.g.,
  95. starting “s3://”, “gcs://”.
  96. @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
  97. [can be a dictionary]
  98. @return: None
  99. Usage examples:
  100. >>> append_df_to_excel('/tmp/test.xlsx', df, autofilter=True,
  101. freeze_panes=(1,0))
  102. >>> append_df_to_excel('/tmp/test.xlsx', df, header=None, index=False)
  103. >>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
  104. index=False)
  105. >>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
  106. index=False, startrow=25)
  107. >>> append_df_to_excel('/tmp/test.xlsx', df, index=False,
  108. fmt_datetime="dd.mm.yyyy hh:mm")
  109. (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
  110. """
  111. def set_column_format(ws, column_letter, fmt):
  112. for cell in ws[column_letter]:
  113. cell.number_format = fmt
  114. filename = Path(filename)
  115. file_exists = filename.is_file()
  116. # process parameters
  117. # calculate first column number
  118. # if the DF will be written using `index=True`, then `first_col = 2`, else `first_col = 1`
  119. first_col = int(to_excel_kwargs.get("index", True)) + 1
  120. # ignore [engine] parameter if it was passed
  121. if 'engine' in to_excel_kwargs:
  122. to_excel_kwargs.pop('engine')
  123. # save content of existing sheets
  124. if file_exists:
  125. wb = load_workbook(filename)
  126. sheet_names = wb.sheetnames
  127. sheet_exists = sheet_name in sheet_names
  128. sheets = {ws.title: ws for ws in wb.worksheets}
  129. with pd.ExcelWriter(
  130. filename.with_suffix(".xlsx"),
  131. engine="openpyxl",
  132. mode="a" if file_exists else "w",
  133. if_sheet_exists="new" if file_exists else None,
  134. date_format=fmt_date,
  135. datetime_format=fmt_datetime,
  136. storage_options=storage_options
  137. ) as writer:
  138. if file_exists:
  139. # try to open an existing workbook
  140. writer.book = wb
  141. # get the last row in the existing Excel sheet
  142. # if it was not specified explicitly
  143. if startrow is None and sheet_name in writer.book.sheetnames:
  144. startrow = writer.book[sheet_name].max_row
  145. # truncate sheet
  146. if truncate_sheet and sheet_name in writer.book.sheetnames:
  147. # index of [sheet_name] sheet
  148. idx = writer.book.sheetnames.index(sheet_name)
  149. # remove [sheet_name]
  150. writer.book.remove(writer.book.worksheets[idx])
  151. # create an empty sheet [sheet_name] using old index
  152. writer.book.create_sheet(sheet_name, idx)
  153. # copy existing sheets
  154. writer.sheets = sheets
  155. else:
  156. # file doesn't exist, we are creating a new one
  157. startrow = 0
  158. # write out the DataFrame to an ExcelWriter
  159. df.to_excel(writer, sheet_name=sheet_name, **to_excel_kwargs)
  160. worksheet = writer.sheets[sheet_name]
  161. if autofilter:
  162. worksheet.auto_filter.ref = worksheet.dimensions
  163. for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
  164. col_no = xl_col_no - first_col
  165. width = max(df.iloc[:, col_no].astype(str).str.len().max(),
  166. len(df.columns[col_no]) + 6)
  167. width = min(max_col_width, width)
  168. column_letter = get_column_letter(xl_col_no)
  169. worksheet.column_dimensions[column_letter].width = width
  170. if np.issubdtype(dtyp, np.integer):
  171. set_column_format(worksheet, column_letter, fmt_int)
  172. if np.issubdtype(dtyp, np.floating):
  173. set_column_format(worksheet, column_letter, fmt_float)
  174. if file_exists and sheet_exists:
  175. # move (append) rows from new worksheet to the `sheet_name` worksheet
  176. wb = load_workbook(filename)
  177. # retrieve generated worksheet name
  178. new_sheet_name = set(wb.sheetnames) - set(sheet_names)
  179. if new_sheet_name:
  180. new_sheet_name = list(new_sheet_name)[0]
  181. # copy rows written by `df.to_excel(...)` to
  182. copy_excel_cell_range(
  183. src_ws=wb[new_sheet_name],
  184. tgt_ws=wb[sheet_name],
  185. tgt_min_row=startrow + 1,
  186. with_style=True
  187. )
  188. # remove new (generated by Pandas) worksheet
  189. del wb[new_sheet_name]
  190. wb.save(filename)
  191. wb.close()

pandas1.2.3 && openpyxl3.0.5

  1. import os
  2. from openpyxl import load_workbook
  3. def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
  4. truncate_sheet=False,
  5. **to_excel_kwargs):
  6. """
  7. Append a DataFrame [df] to existing Excel file [filename]
  8. into [sheet_name] Sheet.
  9. If [filename] doesn't exist, then this function will create it.
  10. @param filename: File path or existing ExcelWriter
  11. (Example: '/path/to/file.xlsx')
  12. @param df: DataFrame to save to workbook
  13. @param sheet_name: Name of sheet which will contain DataFrame.
  14. (default: 'Sheet1')
  15. @param startrow: upper left cell row to dump data frame.
  16. Per default (startrow=None) calculate the last row
  17. in the existing DF and write to the next row...
  18. @param truncate_sheet: truncate (remove and recreate) [sheet_name]
  19. before writing DataFrame to Excel file
  20. @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
  21. [can be a dictionary]
  22. @return: None
  23. Usage examples:
  24. >>> append_df_to_excel('d:/temp/test.xlsx', df)
  25. >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
  26. >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
  27. index=False)
  28. >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
  29. index=False, startrow=25)
  30. (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
  31. """
  32. # Excel file doesn't exist - saving and exiting
  33. if not os.path.isfile(filename):
  34. df.to_excel(
  35. filename,
  36. sheet_name=sheet_name,
  37. startrow=startrow if startrow is not None else 0,
  38. **to_excel_kwargs)
  39. return
  40. # ignore [engine] parameter if it was passed
  41. if 'engine' in to_excel_kwargs:
  42. to_excel_kwargs.pop('engine')
  43. writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
  44. # try to open an existing workbook
  45. writer.book = load_workbook(filename)
  46. # get the last row in the existing Excel sheet
  47. # if it was not specified explicitly
  48. if startrow is None and sheet_name in writer.book.sheetnames:
  49. startrow = writer.book[sheet_name].max_row
  50. # truncate sheet
  51. if truncate_sheet and sheet_name in writer.book.sheetnames:
  52. # index of [sheet_name] sheet
  53. idx = writer.book.sheetnames.index(sheet_name)
  54. # remove [sheet_name]
  55. writer.book.remove(writer.book.worksheets[idx])
  56. # create an empty sheet [sheet_name] using old index
  57. writer.book.create_sheet(sheet_name, idx)
  58. # copy existing sheets
  59. writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
  60. if startrow is None:
  61. startrow = 0
  62. # write out the new sheet
  63. df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
  64. # save the workbook
  65. writer.save()

使用示例:

  1. filename = r'C:\OCC.xlsx'
  2. append_df_to_excel(filename, df)
  3. append_df_to_excel(filename, df, header=None, index=False)
  4. append_df_to_excel(filename, df, sheet_name='Sheet2', index=False)
  5. append_df_to_excel(filename, df, sheet_name='Sheet2', index=False, startrow=25)

ref:Append existing excel sheet with new dataframe using python pandas - Stack Overflow

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号