赞
踩
def createDataframe(): """ 创建DataFrame各种方法 :return: """ # columns:列标签 columns = ['身高(cm)', '体重(kg)'] # 表示行标签 index = [101, 102, 103] # data:表示数据,可以是ndarray数组、series对象、列表、字典等。 data = [ [160, 60], [160, 70], [161, 61], ] # 创建DataFrame 指定数据, columns自动生成 df_1 = pd.DataFrame(data=data) print(df_1) # 创建DataFrame 指定数据, 指定columns df_2 = pd.DataFrame(data=data, columns=columns) print(df_2) # 创建DataFrame 指定数据, 指定columns,指定索引 df_3 = pd.DataFrame(data=data, columns=columns, index=index) print(df_3) tuple_data = { '性别': '男', '身高(cm)': [160, 160, 161], '体重(kg)': [60, 70, 61] } # 创建DataFrame 通过tuple指定data和columns df_4 = pd.DataFrame(tuple_data, index=index) print(df_4)
df_1 未指定index和columns 返回结果:
0 1
0 160 60
1 160 70
2 161 61
df_2 未指定index, 指定columns 返回结果:
身高(cm) 体重(kg)
0 160 60
1 160 70
2 161 61
df_3 指定index, 指定columns 返回结果:
身高(cm) 体重(kg)
101 160 60
102 160 70
103 161 61
df_4 指定index, 指定columns 返回结果:
性别 身高(cm) 体重(kg)
101 男 160 60
102 男 160 70
103 男 161 61
def forFun(): """ for循环遍历 :return: """ columns = ['身高(cm)', '体重(kg)'] index = [101, 102, 103, 104, 105, 106] # data:表示数据,可以是ndarray数组、series对象、列表、字典等。 data = [ [160, 60], [160, 70], [161, 61], [170, 75], [171, 60], [170, 80] ] df = pd.DataFrame(data=data, columns=columns, index=index) print("================== 遍历列") for col in df.columns: series = df[col] print("col =====: ", col) print(series) print("================== 遍历行") for idx, row in df.iterrows(): print("index =====: ", idx) print(row)
遍历列 返回结果:
================== 遍历列 col =====: 身高(cm) 101 160 102 160 103 161 104 170 105 171 106 170 Name: 身高(cm), dtype: int64 col =====: 体重(kg) 101 60 102 70 103 61 104 75 105 60 106 80 Name: 体重(kg), dtype: int64
遍历行 返回结果:
================== 遍历行 index =====: 101 身高(cm) 160 体重(kg) 60 Name: 101, dtype: int64 index =====: 102 身高(cm) 160 体重(kg) 70 Name: 102, dtype: int64 index =====: 103 身高(cm) 161 体重(kg) 61 Name: 103, dtype: int64 index =====: 104 身高(cm) 170 体重(kg) 75 Name: 104, dtype: int64 index =====: 105 身高(cm) 171 体重(kg) 60 Name: 105, dtype: int64 index =====: 106 身高(cm) 170 体重(kg) 80 Name: 106, dtype: int64
def getDataByColNameFun(): """ 根据列名获取列数据 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) res = df[['col-1', 'col-2']] print(res)
res 返回结果:
col-1 col-2
idx-1 11 12
idx-2 21 22
idx-3 31 32
idx-4 41 42
idx-5 51 52
def getDataByLocFun(): """ 根据loc获取列数据 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 通过index获取单行数据 get_one_by_index = df.loc["idx-1"] print(get_one_by_index) # 通过index获取多行数据 get_muilt_by_index = df.loc[["idx-1", "idx-3"]] print(get_muilt_by_index) # 获取指定列(列名 col-1, col-3) get_mult_by_rows = df.loc[:, ['col-1', 'col-3']] print(get_mult_by_rows) # 获取指定列和指定行 get_mult_by_col_rows = df.loc[["idx-1", "idx-3", "idx-5"], ['col-1', 'col-3']] print(get_mult_by_col_rows)
get_one_by_index 返回结果:
col-1 11
col-2 12
col-3 13
col-4 14
get_muilt_by_index 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-3 31 32 33 34
get_mult_by_rows 返回结果:
col-1 col-3
idx-1 11 13
idx-2 21 23
idx-3 31 33
idx-4 41 43
idx-5 51 53
get_mult_by_col_rows 返回结果:
col-1 col-3
idx-1 11 13
idx-3 31 33
idx-5 51 53
def getDataByIlocFun(): """ 根据iloc获取列数据 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 通过行数获取单行数据(0行) get_one_by_row = df.iloc[[0]] print(get_one_by_row) # 通过行数获取多行数据(0行, 3行) get_mult_by_row_1 = df.iloc[[0, 3]] print(get_mult_by_row_1) # 通过行数获取单行数据(0行 ~ 2行) get_mult_by_rows_2 = df.iloc[0:3] print(get_mult_by_rows_2) # 通过行数获取单行数据(1行 ~ 结尾) get_mult_by_rows_3 = df.iloc[1::] print(get_mult_by_rows_3) # 通过列数获取数据 get_mult_by_col_1 = df.iloc[:, [0, 1]] print(get_mult_by_col_1) # 通过行数和列数获取数据 get_mult_by_col_2 = df.iloc[[0, 3], [0, 1]] print(get_mult_by_col_2) # 通过行数和列数获取数据 get_mult_by_col_3 = df.iloc[:, :3] print(get_mult_by_col_3)
get_one_by_row 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
get_mult_by_row_1 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-4 41 42 43 44
get_mult_by_rows_2 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 21 22 23 24
idx-3 31 32 33 34
get_mult_by_rows_3 返回结果:
col-1 col-2 col-3 col-4
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
get_mult_by_col_1 返回结果:
col-1 col-2
idx-1 11 12
idx-2 21 22
idx-3 31 32
idx-4 41 42
idx-5 51 52
get_mult_by_col_2 返回结果:
col-1 col-2
idx-1 11 12
idx-4 41 42
get_mult_by_col_3 返回结果:
col-1 col-2 col-3
idx-1 11 12 13
idx-2 21 22 23
idx-3 31 32 33
idx-4 41 42 43
idx-5 51 52 53
def insertCol(): """ 增加列 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 在末尾增加列 df["col-6"] = [106, 206, 306, 406, 506] df.loc[:, 'col-7'] = [116, 216, 316, 416, 516] # 指定位置插入列 df.insert(1, 'col-01', [216, 226, 236, 246, 256]) print(df)
原始数据:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
新增列 返回结果:
col-1 col-01 col-2 col-3 col-4 col-6 col-7
idx-1 11 216 12 13 14 106 116
idx-2 21 226 22 23 24 206 216
idx-3 31 236 32 33 34 306 316
idx-4 41 246 42 43 44 406 416
idx-5 51 256 52 53 54 506 516
def insertRow(): """ 插入行 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 插入单行 df.loc["idx-100"] = [101, 102, 103, 104] print(df)
原始数据:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
新增行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
idx-100 101 102 103 104
def updateByLoc(): """ 通过loc修改数据 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 根据行标签 修改整行数据 update_by_index = df.copy() update_by_index.loc["idx-1"] = [101, 102, 103, 104] print(update_by_index) # 根据列标签 修改整列数据 update_by_column = df.copy() update_by_column.loc[:, "col-2"] = [111, 121, 131, 141, 151] print(update_by_column) # 修改某列某行 update_element = df.copy() update_element.loc["idx-1", "col-2"] = 1002 print(update_element)
update_by_index 修改整行(idx-1)数据 返回结果:
col-1 col-2 col-3 col-4
idx-1 101 102 103 104
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
update_by_column 修改整列(col-2)数据:
col-1 col-2 col-3 col-4
idx-1 11 111 13 14
idx-2 21 121 23 24
idx-3 31 131 33 34
idx-4 41 141 43 44
idx-5 51 151 53 54
update_element 修改某列某行【col-2 : idx-1】数据:
col-1 col-2 col-3 col-4
idx-1 11 1002 13 14
idx-2 21 22 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
def updateByIloc(): """ 通过iloc修改数据 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], [41, 42, 43, 44], [51, 52, 53, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) update_element = df.copy() update_element.iloc[1, 1] = 202 print(update_element) # 修改第一行数据 update_row = df.copy() update_row.iloc[1, :] = [121, 122, 123, 124] print(update_row) # 修改第2列数据 update_col = df.copy() update_col.iloc[:, 1] = [112, 122, 132, 142, 152] print(update_col)
update_element 修改某列某行【col-2 : idx-2】 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 21 202 23 24
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
update_row 修改第一行(idx-2)数据 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13 14
idx-2 121 122 123 124
idx-3 31 32 33 34
idx-4 41 42 43 44
idx-5 51 52 53 54
update_col 修改第2列(col-2)数据 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 112 13 14
idx-2 21 122 23 24
idx-3 31 132 33 34
idx-4 41 142 43 44
idx-5 51 152 53 54
def dropFun(): """ 删除元素 :return: """ data = [ [11, 12, 13, 14], [21, 22, 23, np.NaN], [31, 32, 33, np.NaN], [41, 42, 43, np.NaN], [51, 52, np.NaN, 54] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 通过列名 删除col-1列 drop_col = df.copy() drop_col.drop(["col-1"], axis=1, inplace=True) print(drop_col) # 通过columns 删除col-1列 drop_col_by_columns = df.copy() drop_col_by_columns.drop(columns="col-1", inplace=True) print(drop_col_by_columns) # 通过标签 删除 col-1列 drop_col_by_labels = df.copy() drop_col_by_labels.drop(labels="col-1", axis=1, inplace=True) print(drop_col_by_labels) # 通过index 删除 idx-1行 drop_row_by_index = df.copy() drop_row_by_index.drop(index="idx-1", inplace=True) print(drop_row_by_index) # 通过标签 删除 idx-1行 drop_row_by_labels = df.copy() drop_row_by_labels.drop(labels="idx-1", axis=0, inplace=True) print(drop_row_by_labels) # 通过index 删除idx-2, idx-3行 drop_rows_by_index = df.copy() drop_rows_by_index.drop(["idx-2", "idx-3"], inplace=True) print(drop_rows_by_index) # 通过过滤条件删除单行 drop_by_condition_1 = df.copy() index1 = drop_by_condition_1[drop_by_condition_1["col-1"].isin([41])].index[0] drop_by_condition_1.drop(index=index1, inplace=True) print(drop_by_condition_1) # 通过过滤条件删除多行 drop_by_condition_2 = df.copy() index2 = drop_by_condition_2[drop_by_condition_2["col-1"].isin([41, 51])].index drop_by_condition_2.drop(index=index2, inplace=True) print(drop_by_condition_2) # 通过过滤条件删除多行 drop_by_condition_3 = df.copy() index3 = drop_by_condition_3[drop_by_condition_3["col-1"] > 40].index drop_by_condition_3.drop(index=index3, inplace=True) print(drop_by_condition_3) # 通过组合过滤条件删除多行 drop_by_condition_4 = df.copy() index4 = drop_by_condition_4[ (drop_by_condition_4["col-1"].isin([41, 51])) & (drop_by_condition_4["col-2"] > 50)].index drop_by_condition_4.drop(index=index4, inplace=True) print(drop_by_condition_4) # 删除含有NAN的行 drop_na_1 = df.copy() drop_na_1 = drop_na_1.dropna() print(drop_na_1) # 删除含有NAN的行 drop_na_2 = df.copy() drop_na_2 = drop_na_2[drop_na_2["col-4"].notnull()] print(drop_na_2)
原始数据:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-2 21 22 23.0 NaN
idx-3 31 32 33.0 NaN
idx-4 41 42 43.0 NaN
idx-5 51 52 NaN 54.0
drop_col 通过列名 删除col-1列 返回结果:
drop_col_by_columns 通过columns 删除col-1列 返回结果:
drop_col_by_labels 通过标签 删除 col-1列 返回结果:
col-2 col-3 col-4
idx-1 12 13.0 14.0
idx-2 22 23.0 NaN
idx-3 32 33.0 NaN
idx-4 42 43.0 NaN
idx-5 52 NaN 54.0
drop_row_by_index 通过index 删除 idx-1行 返回结果:
drop_row_by_labels 通过标签 删除 idx-1行 返回结果:
col-1 col-2 col-3 col-4
idx-2 21 22 23.0 NaN
idx-3 31 32 33.0 NaN
idx-4 41 42 43.0 NaN
idx-5 51 52 NaN 54.0
drop_rows_by_index 通过index 删除idx-2, idx-3行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-4 41 42 43.0 NaN
idx-5 51 52 NaN 54.0
drop_by_condition_1 通过过滤条件删除单行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-2 21 22 23.0 NaN
idx-3 31 32 33.0 NaN
idx-5 51 52 NaN 54.0
drop_by_condition_2 通过过滤条件删除多行 返回结果:
drop_by_condition_3 通过过滤条件删除多行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-2 21 22 23.0 NaN
idx-3 31 32 33.0 NaN
drop_by_condition_4 通过组合过滤条件删除多行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-2 21 22 23.0 NaN
idx-3 31 32 33.0 NaN
idx-4 41 42 43.0 NaN
drop_na_1 删除含有NAN的行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
drop_na_2 删除含有NAN的行 返回结果:
col-1 col-2 col-3 col-4
idx-1 11 12 13.0 14.0
idx-5 51 52 NaN 54.0
def sortFun(): """ 排序 :return: """ data = [ [51, 52, 53, 'c'], [31, 32, 33, 'b'], [21, 22, 23, 'b'], [41, 42, 43, 'c'], [11, 12, 13, 'a'] ] columns = ['col-1', 'col-2', 'col-3', 'col-4'] index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5'] df = pd.DataFrame(data=data, index=index, columns=columns) # 降序 desc = df.sort_values(by="col-1", ascending=False) print(desc) # 升序 asc = df.sort_values(by="col-1", ascending=True) print(asc) # 分组之后并排序 group_desc = df.groupby(["col-4"])["col-1"].sum().reset_index() print(group_desc) # 对某列进行降序,并显示排名 rank_df = df.sort_values(by='col-1', ascending=False) rank_df['sort-num'] = rank_df['col-1'].rank(method="first", ascending=False) print(rank_df[['col-4', 'col-1', 'sort-num']]) # 对某列进行降序,并显示排名 rank_df_1 = df.copy() rank_df_1['sort-num'] = rank_df_1['col-1'].rank(ascending=False) print(rank_df_1)
原始数据:
col-1 col-2 col-3 col-4
idx-1 51 52 53 c
idx-2 31 32 33 b
idx-3 21 22 23 b
idx-4 41 42 43 c
idx-5 11 12 13 a
desc 降序 返回结果:
col-1 col-2 col-3 col-4
idx-1 51 52 53 c
idx-4 41 42 43 c
idx-2 31 32 33 b
idx-3 21 22 23 b
idx-5 11 12 13 a
asc 升序 返回结果:
col-1 col-2 col-3 col-4
idx-5 11 12 13 a
idx-3 21 22 23 b
idx-2 31 32 33 b
idx-4 41 42 43 c
idx-1 51 52 53 c
group_desc 分组之后并排序 返回结果:
col-4 col-1
0 a 11
1 b 52
2 c 92
rank_df 对某列进行降序,并显示排名 返回结果:
col-4 col-1 sort-num
idx-1 c 51 1.0
idx-4 c 41 2.0
idx-2 b 31 3.0
idx-3 b 21 4.0
idx-5 a 11 5.0
rank_df_1 对某列进行降序,并显示排名 返回结果:
col-1 col-2 col-3 col-4 sort-num
idx-1 51 52 53 c 1.0
idx-2 31 32 33 b 3.0
idx-3 21 22 23 b 4.0
idx-4 41 42 43 c 2.0
idx-5 11 12 13 a 5.0
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。