赞
踩
源数据如下:含脏数据(price列)
下列源码用到如下四个包:
import pandas as pd
import glob
import os
import csv
def base_read_and_write():
input_file = 'csv_python.csv'#sys.argv[1]
output_file = 'csv_python_write.csv'#sys.argv[2]
data_frame = pd.read_csv(input_file)
data_frame.to_csv(output_file,index = False)
def write_row_in_col(): input_file = 'csv_python.csv' output_file = 'csv_python_write.csv' data_frame = pd.read_csv(input_file) data_frame['price'] = data_frame['price'].str.strip('¥').str.replace(',','').astype(float) #清洗脏数据,这里有以万为单位的,也有以元为单位的,根据房产实际情况,我们把它们都整理成以万为单位的 for i,millions_row in data_frame.iterrows(): '''if (millions_row['price']>10000): million = millions_row['price']/10000 else: million = millions_row['price']''' million = millions_row['price']/10000 if millions_row['price']>10000 else millions_row['price']#等同于上面的if-else data_frame.at[i,'price'] = '{}'.format(million) #取出含有'世贸'且房价大于200万的房子 #[,:],逗号前为行,逗号后为列,:表示所有,如选定列,例如为:df.loc[:,'A'] data_frame_value_meets_condition = data_frame.loc[(data_frame['name'].str.contains('世茂')) & (data_frame['price']>200),:] data_frame_value_meets_condition.to_csv(output_file,index = False)
运行,新文件显示如下:
def write_row_in_set():
input_file =<
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。