当前位置:   article > 正文

数据分析常用库之【pandas】DataFrame方法操作_python pd.dataframe index=[0]

python pd.dataframe index=[0]
  1. #!/usr/bin/python
  2. from __future__ import print_function
  3. from __future__ import with_statement
  4. import os #获取当前工作路径
  5. import numpy as np
  6. import pandas as pd
  7. from pandas import * # Sereis, DataFrame
  8. file = os.getcwd() + '\\1.csv' #获取文件路径,文件命名并传给变量file
  9. print(file)
  10. '''生成4X4表格,索引列为abcd,行栏为wxyz'''
  11. data = DataFrame(np.arange(16).reshape(4,4),index=list('abcd'),columns=list('wxyz'))
  12. print(data)
  13. print(data['w']) #选择表格中的'w'列,使用类字典属性,返回的是Series类型
  14. print(data.w) #选择表格中的'w'列,使用点属性,返回的是Series类型
  15. print(data[['w']]) #选择表格中的'w'列,返回的是DataFrame类型
  16. print(data[['w','z']]) #选择表格中的'w'、'z'列
  17. data = DataFrame({'a':[1, 2, 3], 'b': [4, 5, 6]}) #要保存的数据
  18. print(data)
  19. data.to_csv(file,index=None,encoding='utf-8')
  20. import pandas as pd
  21. import numpy as np
  22. '''第一种写法:当值都是list类型的数据'''
  23. data1 = {'A':range(3),'B':list("abc"),'C':['red',np.NaN,'yellow']}
  24. df1=pd.DataFrame(data1)
  25. '''第二种写法:当值为string类型的数据,此时需要加上 index=[0] 因为pandas 的dataframe需要一个可迭代的对象'''
  26. data2 = {'姓名': 'fuhang', '性别': '男', '昵称': '那时的吻真香'}
  27. df2=pd.DataFrame(data2,index=[0])
  28. print(df2)
  29. df1.to_csv('Result1.csv',index=None,encoding='utf-8')
  30. df2.to_csv('Result2.csv',index=None,encoding='utf-8')
  31. df1.to_csv('Result1.csv',index=None,mode='a')
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  5. """
  6. from __future__ import print_function
  7. import pandas as pd
  8. import numpy as np
  9. #生成时间序列
  10. dates = pd.date_range('20130101', periods=6)
  11. print('1')
  12. print(dates)
  13. #numpy.random.randn(d0,d1,…,dn)
  14. #randn函数返回一个或者一组样本,具有标准正态分布
  15. #dn表示每个维度
  16. #返回值为指定维度的排列
  17. df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
  18. print('2')
  19. print(df)
  20. #将第三行第三列的数值修改为1111
  21. df.iloc[2,2] = 1111
  22. print('3')
  23. print(df)
  24. #将索引列为2013-01-03,索引行为D的数值修改为2222
  25. df.loc['2013-01-03', 'D'] = 2222
  26. print('3')
  27. print(df)
  28. #将索引行大于0的数值置为0
  29. df.A[df.A>0] = 0
  30. print('4')
  31. print(df)
  32. #增加索引行F并置为空
  33. df['F'] = np.nan
  34. print('5')
  35. print(df)
  36. #增加索引行G,并按照日期索引列序列排序值赋值
  37. df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
  38. print('6')
  39. print(df)
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  5. """
  6. from __future__ import print_function
  7. import pandas as pd
  8. import numpy as np
  9. #date_range函数生成日期序列
  10. dates = pd.date_range('20130101', periods=6)
  11. #生成带起点和终点的特定步长的排列,dataframe格式为6X4,索引列为日期,索引行为ABCD
  12. df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])
  13. print('1')
  14. print(df)
  15. '''
  16. A B C D
  17. 2013-01-01 0 1 2 3
  18. 2013-01-02 4 5 6 7
  19. 2013-01-03 8 9 10 11
  20. 2013-01-04 12 13 14 15
  21. 2013-01-05 16 17 18 19
  22. 2013-01-06 20 21 22 23'''
  23. #将第一行第二列的值置为空值
  24. df.iloc[0,1] = np.nan
  25. print('2')
  26. print(df)
  27. '''
  28. A B C D
  29. 2013-01-01 0 NaN 2 3
  30. 2013-01-02 4 5.0 6 7
  31. 2013-01-03 8 9.0 10 11
  32. 2013-01-04 12 13.0 14 15
  33. 2013-01-05 16 17.0 18 19
  34. 2013-01-06 20 21.0 22 23
  35. '''
  36. #将第二行第三列的值置为空值
  37. df.iloc[1,2] = np.nan
  38. print('3')
  39. print(df)
  40. '''
  41. A B C D
  42. 2013-01-01 0 NaN 2.0 3
  43. 2013-01-02 4 5.0 NaN 7
  44. 2013-01-03 8 9.0 10.0 11
  45. 2013-01-04 12 13.0 14.0 15
  46. 2013-01-05 16 17.0 18.0 19
  47. 2013-01-06 20 21.0 22.0 23
  48. '''
  49. print('4')
  50. #将含有空值的行丢弃掉
  51. print(df.dropna()) # dropna默认丢弃任何含有缺失的行:
  52. '''
  53. A B C D
  54. 2013-01-03 8 9.0 10.0 11
  55. 2013-01-04 12 13.0 14.0 15
  56. 2013-01-05 16 17.0 18.0 19
  57. 2013-01-06 20 21.0 22.0 23
  58. '''
  59. print('5')
  60. #将空值列置为0
  61. print(df.fillna(value=0))
  62. '''
  63. A B C D
  64. 2013-01-01 0 0.0 2.0 3
  65. 2013-01-02 4 5.0 0.0 7
  66. 2013-01-03 8 9.0 10.0 11
  67. 2013-01-04 12 13.0 14.0 15
  68. 2013-01-05 16 17.0 18.0 19
  69. 2013-01-06 20 21.0 22.0 23
  70. '''
  71. print('6')
  72. #将空值列置为真
  73. print(pd.isnull(df))
  74. '''
  75. A B C D
  76. 2013-01-01 False True False False
  77. 2013-01-02 False False True False
  78. 2013-01-03 False False False False
  79. 2013-01-04 False False False False
  80. 2013-01-05 False False False False
  81. 2013-01-06 False False False False
  82. '''
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  5. """
  6. from __future__ import print_function
  7. import pandas as pd
  8. import numpy as np
  9. # concatenating
  10. # ignore index
  11. df1 = pd.DataFrame(np.ones((4,4))*0, columns=['a','b','c','d'])
  12. print(df1)
  13. df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
  14. print(df2)
  15. df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
  16. print(df3)
  17. #axis=0时,表示在行维度上扩展,默认为外连接
  18. res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
  19. print("(1) test")
  20. print(res)
  21. '''
  22. a b c d
  23. 0 0.0 0.0 0.0 0.0
  24. 1 0.0 0.0 0.0 0.0
  25. 2 0.0 0.0 0.0 0.0
  26. 3 0.0 0.0 0.0 0.0
  27. 4 1.0 1.0 1.0 1.0
  28. 5 1.0 1.0 1.0 1.0
  29. 6 1.0 1.0 1.0 1.0
  30. 7 2.0 2.0 2.0 2.0
  31. 8 2.0 2.0 2.0 2.0
  32. 9 2.0 2.0 2.0 2.0
  33. '''
  34. #axis=0时,表示在列维度上扩展,默认为外连接
  35. res1 = pd.concat([df1, df2, df3], axis=1, ignore_index=True)
  36. print("(2) test")
  37. print(res1)
  38. '''
  39. 0 1 2 3 4 5 6 7 8 9 10 11
  40. 0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
  41. 1 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
  42. 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
  43. 3 0.0 0.0 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN
  44. '''
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  5. """
  6. from __future__ import print_function
  7. import pandas as pd
  8. # merging two df by key/keys. (may be used in database)
  9. # simple example
  10. left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
  11. 'A': ['A0', 'A1', 'A2', 'A3'],
  12. 'B': ['B0', 'B1', 'B2', 'B3']})
  13. right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
  14. 'C': ['C0', 'C1', 'C2', 'C3'],
  15. 'D': ['D0', 'D1', 'D2', 'D3']})
  16. print(left)
  17. print(right)
  18. #内连接,关联字段为'key'
  19. res = pd.merge(left, right, on='key')
  20. print(res)
  21. '''
  22. key A B C D
  23. 0 K0 A0 B0 C0 D0
  24. 1 K1 A1 B1 C1 D1
  25. 2 K2 A2 B2 C2 D2
  26. 3 K3 A3 B3 C3 D3
  27. '''
  28. # consider two keys
  29. left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
  30. 'key2': ['K0', 'K1', 'K0', 'K1'],
  31. 'A': ['A0', 'A1', 'A2', 'A3'],
  32. 'B': ['B0', 'B1', 'B2', 'B3']})
  33. right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
  34. 'key2': ['K0', 'K0', 'K0', 'K0'],
  35. 'C': ['C0', 'C1', 'C2', 'C3'],
  36. 'D': ['D0', 'D1', 'D2', 'D3']})
  37. print('1')
  38. print(left)
  39. print('2')
  40. print(right)
  41. print('3')
  42. res = pd.merge(left, right, on=['key1', 'key2'], how='inner') # default for how='inner'
  43. # how = ['left', 'right', 'outer', 'inner']
  44. print("test inner join")
  45. #内连接,且关联主键为'[key1,key2]'联合主键
  46. print(res)
  47. '''
  48. test inner join
  49. key1 key2 A B C D
  50. 0 K0 K0 A0 B0 C0 D0
  51. 1 K1 K0 A2 B2 C1 D1
  52. 2 K1 K0 A2 B2 C2 D2
  53. '''
  54. print('4')
  55. res = pd.merge(left, right, on=['key1', 'key2'], how='left')
  56. print("test left join")
  57. #左外连接,以左边的表为主表
  58. print(res)
  59. '''
  60. test left join
  61. key1 key2 A B C D
  62. 0 K0 K0 A0 B0 C0 D0
  63. 1 K0 K1 A1 B1 NaN NaN
  64. 2 K1 K0 A2 B2 C1 D1
  65. 3 K1 K0 A2 B2 C2 D2
  66. 4 K2 K1 A3 B3 NaN NaN
  67. '''
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
  5. """
  6. from __future__ import print_function
  7. import pandas as pd
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. # plot data
  11. # Series
  12. data = pd.Series(np.random.randn(1000), index=np.arange(1000))
  13. print('1')
  14. print(data)
  15. data = data.cumsum()
  16. ##data.plot()
  17. print('2')
  18. print(data)
  19. # DataFrame
  20. data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))
  21. print('3')
  22. print(data)
  23. #本行加上上一行的数
  24. data = data.cumsum()
  25. print('4')
  26. print(data)
  27. # plot methods:
  28. # 'bar', 'hist', 'box', 'kde', 'area', scatter', hexbin', 'pie'
  29. ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label="Class 1")
  30. print('5')
  31. print(ax)
  32. data.plot.scatter(x='A', y='C', color='LightGreen', label='Class 2', ax=ax)
  33. plt.show()

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/153080
推荐阅读
相关标签
  

闽ICP备14008679号