赞
踩
- import random
-
- import numpy as np
- import pandas
- import pandas as pd
- import seaborn
-
- # u = pd.DataFrame(np.random.randn(3, 4), columns=list('abcd'), index=list('123'))
- # print(u)
-
- t = pd.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4, 4])
- t_u = pd.DataFrame({'a': t}, columns=['a'])
- # print(t_u)
-
- obj = pandas.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4])
- # print(obj.unique())
- jlj = pd.Series(obj).value_counts()
- # print(jlj)
- mask = obj.isin([-1, 4])
- # print(mask) # mask 是一串逻辑词
-
- # print(obj[mask])
-
- tt = [np.random.randint(10) for p in range(10)]
- # print(tt)
-
- data = pd.DataFrame({'q1': [4, 0, 5, 2, 5, 1, 0, 1, 9, 8],
- 'q2': [4, 2, 7, 4, 6, 1, 3, 2, 3, 2],
- 'q3': [2, 5, 5, 3, 5, 2, 6, 2, 2, 2]
- })
-
- result = data.apply(pd.value_counts).fillna(0)
- # result = pd.Series(data).apply(value_counts).fillna(0)
- # print(data,'\n',result)
-
-
- data_already = [1,2,3,4,5,6]
- weight = [1,1,1,4,4,4]
- list_num = random.choices(data_already, weights = weight,k = 2000)
- s = pd.Series(list_num)
- count_1 = (s==1).sum()
- print(count_1)
-
- data_Series = pd.Series(list_num)
- dup_data_1 = pd.Series(data_Series).value_counts() # dup_data_1 本身是一个series
- # print(dup_data_1)
- print(dup_data_1[1])
-
-
- data_1 = pd.DataFrame({"food": ["bacon", "pulled pork", "Bacon",
- "Pastrami", "Corned beef", "bacon",
- "pastrami", "honey ham", "nova lox"],
- "ounces": [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
-
- meat_to_animal = {
- "bacon": "pig",
- "pulled pork": "pig",
- "pastrami": "cow",
- "corned beef": "cow",
- "honey ham": "pig",
- "nova lox": "salmon"
- }
-
- lowercased = data_1['food'].str.lower()
- # print(lowercased)
- # print(data_1)
-
- # 注意这个lowercased和map的使用
- data_1['animal'] = lowercased.map(meat_to_animal)
- # print(data_1)
-
- data_1['animal'] = data_1['food'].map(lambda x: meat_to_animal[x.lower()])
- # print(data_1)
-
- data_3 = pd.DataFrame(np.arange(12).reshape((3, 4)),
- index=["Ohio", "Colorado", "New York"],
- columns=["one", "two", "three", "four"])
-
- # transform = lambda x: x[:4].upper() # 对x前4个元素大写
- transform = lambda x: x.upper()
- data_3.columns = data_3.columns.map(transform) # map会挨个对data_3.columns的元素做运算
- data_3.index = data_3.index.map(transform)
-
- # print(data_3)
-
- # data_4 = data_3.rename(index = str.title,columns=str.lower)
- # data_3
- # data_4
- #
将food和animal对应起来,并加入到dataframe新的一栏
- data = pd.DataFrame(np.random.randn(1000, 4))
-
- col = data[1]
- # print(col[np.abs(col)>3])
-
- # data本身是dataframe变量,可以作为内部数据的代名词代入函数运算
- row_bigger_than_3 = data[(np.abs(data) > 3).any(axis=1)] # any()括号内部需要关键字axis,or 报错
-
- # data括号内可以设置条件,表示只对满足条件的元素操作
- data[(np.abs(data)>3)] = np.sign(data)*3
- print(row_bigger_than_3)
- u_index = row_bigger_than_3.index
- # DataFrame 默认索引是列索引,可使用loc[rows,columns],只写一项则默认是rows索引
- print(data.loc[u_index])
- print(np.sign(data))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。