当前位置:   article > 正文

pandas_learning

pandas_learning

 1,普通操作

  1. import random
  2. import numpy as np
  3. import pandas
  4. import pandas as pd
  5. import seaborn
  6. # u = pd.DataFrame(np.random.randn(3, 4), columns=list('abcd'), index=list('123'))
  7. # print(u)
  8. t = pd.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4, 4])
  9. t_u = pd.DataFrame({'a': t}, columns=['a'])
  10. # print(t_u)
  11. obj = pandas.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4])
  12. # print(obj.unique())
  13. jlj = pd.Series(obj).value_counts()
  14. # print(jlj)
  15. mask = obj.isin([-1, 4])
  16. # print(mask) # mask 是一串逻辑词
  17. # print(obj[mask])
  18. tt = [np.random.randint(10) for p in range(10)]
  19. # print(tt)
  20. data = pd.DataFrame({'q1': [4, 0, 5, 2, 5, 1, 0, 1, 9, 8],
  21. 'q2': [4, 2, 7, 4, 6, 1, 3, 2, 3, 2],
  22. 'q3': [2, 5, 5, 3, 5, 2, 6, 2, 2, 2]
  23. })
  24. result = data.apply(pd.value_counts).fillna(0)
  25. # result = pd.Series(data).apply(value_counts).fillna(0)
  26. # print(data,'\n',result)
  27. data_already = [1,2,3,4,5,6]
  28. weight = [1,1,1,4,4,4]
  29. list_num = random.choices(data_already, weights = weight,k = 2000)
  30. s = pd.Series(list_num)
  31. count_1 = (s==1).sum()
  32. print(count_1)
  33. data_Series = pd.Series(list_num)
  34. dup_data_1 = pd.Series(data_Series).value_counts() # dup_data_1 本身是一个series
  35. # print(dup_data_1)
  36. print(dup_data_1[1])

2,match(map)操作

  1. data_1 = pd.DataFrame({"food": ["bacon", "pulled pork", "Bacon",
  2. "Pastrami", "Corned beef", "bacon",
  3. "pastrami", "honey ham", "nova lox"],
  4. "ounces": [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
  5. meat_to_animal = {
  6. "bacon": "pig",
  7. "pulled pork": "pig",
  8. "pastrami": "cow",
  9. "corned beef": "cow",
  10. "honey ham": "pig",
  11. "nova lox": "salmon"
  12. }
  13. lowercased = data_1['food'].str.lower()
  14. # print(lowercased)
  15. # print(data_1)
  16. # 注意这个lowercased和map的使用
  17. data_1['animal'] = lowercased.map(meat_to_animal)
  18. # print(data_1)
  19. data_1['animal'] = data_1['food'].map(lambda x: meat_to_animal[x.lower()])
  20. # print(data_1)
  21. data_3 = pd.DataFrame(np.arange(12).reshape((3, 4)),
  22. index=["Ohio", "Colorado", "New York"],
  23. columns=["one", "two", "three", "four"])
  24. # transform = lambda x: x[:4].upper() # 对x前4个元素大写
  25. transform = lambda x: x.upper()
  26. data_3.columns = data_3.columns.map(transform) # map会挨个对data_3.columns的元素做运算
  27. data_3.index = data_3.index.map(transform)
  28. # print(data_3)
  29. # data_4 = data_3.rename(index = str.title,columns=str.lower)
  30. # data_3
  31. # data_4
  32. #

将food和animal对应起来,并加入到dataframe新的一栏

3, 替换DataFrame里的元素

  1. data = pd.DataFrame(np.random.randn(1000, 4))
  2. col = data[1]
  3. # print(col[np.abs(col)>3])
  4. # data本身是dataframe变量,可以作为内部数据的代名词代入函数运算
  5. row_bigger_than_3 = data[(np.abs(data) > 3).any(axis=1)] # any()括号内部需要关键字axis,or 报错
  6. # data括号内可以设置条件,表示只对满足条件的元素操作
  7. data[(np.abs(data)>3)] = np.sign(data)*3
  8. print(row_bigger_than_3)
  9. u_index = row_bigger_than_3.index
  10. # DataFrame 默认索引是列索引,可使用loc[rows,columns],只写一项则默认是rows索引
  11. print(data.loc[u_index])
  12. print(np.sign(data))

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/代码创新者/article/detail/60440
推荐阅读
相关标签
  

闽ICP备14008679号