赞
踩
- #!/usr/bin/python
- from __future__ import print_function
- from __future__ import with_statement
- import os #获取当前工作路径
- import numpy as np
- import pandas as pd
- from pandas import * # Sereis, DataFrame
- file = os.getcwd() + '\\1.csv' #获取文件路径,文件命名并传给变量file
- print(file)
- '''生成4X4表格,索引列为abcd,行栏为wxyz'''
- data = DataFrame(np.arange(16).reshape(4,4),index=list('abcd'),columns=list('wxyz'))
- print(data)
- print(data['w']) #选择表格中的'w'列,使用类字典属性,返回的是Series类型
- print(data.w) #选择表格中的'w'列,使用点属性,返回的是Series类型
- print(data[['w']]) #选择表格中的'w'列,返回的是DataFrame类型
- print(data[['w','z']]) #选择表格中的'w'、'z'列
- data = DataFrame({'a':[1, 2, 3], 'b': [4, 5, 6]}) #要保存的数据
- print(data)
- data.to_csv(file,index=None,encoding='utf-8')
-
- import pandas as pd
- import numpy as np
- '''第一种写法:当值都是list类型的数据'''
- data1 = {'A':range(3),'B':list("abc"),'C':['red',np.NaN,'yellow']}
- df1=pd.DataFrame(data1)
- '''第二种写法:当值为string类型的数据,此时需要加上 index=[0] 因为pandas 的dataframe需要一个可迭代的对象'''
- data2 = {'姓名': 'fuhang', '性别': '男', '昵称': '那时的吻真香'}
- df2=pd.DataFrame(data2,index=[0])
- print(df2)
- df1.to_csv('Result1.csv',index=None,encoding='utf-8')
- df2.to_csv('Result2.csv',index=None,encoding='utf-8')
- df1.to_csv('Result1.csv',index=None,mode='a')
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
-
- """
- Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
- """
- from __future__ import print_function
- import pandas as pd
- import numpy as np
- #生成时间序列
- dates = pd.date_range('20130101', periods=6)
- print('1')
- print(dates)
- #numpy.random.randn(d0,d1,…,dn)
- #randn函数返回一个或者一组样本,具有标准正态分布
- #dn表示每个维度
- #返回值为指定维度的排列
- df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
- print('2')
- print(df)
- #将第三行第三列的数值修改为1111
- df.iloc[2,2] = 1111
- print('3')
- print(df)
- #将索引列为2013-01-03,索引行为D的数值修改为2222
- df.loc['2013-01-03', 'D'] = 2222
- print('3')
- print(df)
- #将索引行大于0的数值置为0
- df.A[df.A>0] = 0
- print('4')
- print(df)
- #增加索引行F并置为空
- df['F'] = np.nan
- print('5')
- print(df)
- #增加索引行G,并按照日期索引列序列排序值赋值
- df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
- print('6')
- print(df)
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
-
- """
- Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
- """
- from __future__ import print_function
- import pandas as pd
- import numpy as np
- #date_range函数生成日期序列
- dates = pd.date_range('20130101', periods=6)
- #生成带起点和终点的特定步长的排列,dataframe格式为6X4,索引列为日期,索引行为ABCD
- df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])
- print('1')
- print(df)
- '''
- A B C D
- 2013-01-01 0 1 2 3
- 2013-01-02 4 5 6 7
- 2013-01-03 8 9 10 11
- 2013-01-04 12 13 14 15
- 2013-01-05 16 17 18 19
- 2013-01-06 20 21 22 23'''
- #将第一行第二列的值置为空值
- df.iloc[0,1] = np.nan
- print('2')
- print(df)
- '''
- A B C D
- 2013-01-01 0 NaN 2 3
- 2013-01-02 4 5.0 6 7
- 2013-01-03 8 9.0 10 11
- 2013-01-04 12 13.0 14 15
- 2013-01-05 16 17.0 18 19
- 2013-01-06 20 21.0 22 23
- '''
- #将第二行第三列的值置为空值
- df.iloc[1,2] = np.nan
- print('3')
- print(df)
- '''
- A B C D
- 2013-01-01 0 NaN 2.0 3
- 2013-01-02 4 5.0 NaN 7
- 2013-01-03 8 9.0 10.0 11
- 2013-01-04 12 13.0 14.0 15
- 2013-01-05 16 17.0 18.0 19
- 2013-01-06 20 21.0 22.0 23
- '''
- print('4')
- #将含有空值的行丢弃掉
- print(df.dropna()) # dropna默认丢弃任何含有缺失的行:
- '''
- A B C D
- 2013-01-03 8 9.0 10.0 11
- 2013-01-04 12 13.0 14.0 15
- 2013-01-05 16 17.0 18.0 19
- 2013-01-06 20 21.0 22.0 23
- '''
- print('5')
- #将空值列置为0
- print(df.fillna(value=0))
- '''
- A B C D
- 2013-01-01 0 0.0 2.0 3
- 2013-01-02 4 5.0 0.0 7
- 2013-01-03 8 9.0 10.0 11
- 2013-01-04 12 13.0 14.0 15
- 2013-01-05 16 17.0 18.0 19
- 2013-01-06 20 21.0 22.0 23
- '''
- print('6')
- #将空值列置为真
- print(pd.isnull(df))
- '''
- A B C D
- 2013-01-01 False True False False
- 2013-01-02 False False True False
- 2013-01-03 False False False False
- 2013-01-04 False False False False
- 2013-01-05 False False False False
- 2013-01-06 False False False False
- '''
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
-
- """
- Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
- """
- from __future__ import print_function
- import pandas as pd
- import numpy as np
-
- # concatenating
- # ignore index
- df1 = pd.DataFrame(np.ones((4,4))*0, columns=['a','b','c','d'])
- print(df1)
- df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
- print(df2)
- df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
- print(df3)
- #axis=0时,表示在行维度上扩展,默认为外连接
- res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
- print("(1) test")
- print(res)
- '''
- a b c d
- 0 0.0 0.0 0.0 0.0
- 1 0.0 0.0 0.0 0.0
- 2 0.0 0.0 0.0 0.0
- 3 0.0 0.0 0.0 0.0
- 4 1.0 1.0 1.0 1.0
- 5 1.0 1.0 1.0 1.0
- 6 1.0 1.0 1.0 1.0
- 7 2.0 2.0 2.0 2.0
- 8 2.0 2.0 2.0 2.0
- 9 2.0 2.0 2.0 2.0
- '''
- #axis=0时,表示在列维度上扩展,默认为外连接
- res1 = pd.concat([df1, df2, df3], axis=1, ignore_index=True)
- print("(2) test")
- print(res1)
- '''
- 0 1 2 3 4 5 6 7 8 9 10 11
- 0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
- 1 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
- 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
- 3 0.0 0.0 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN
- '''
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- """
- Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
- """
- from __future__ import print_function
- import pandas as pd
-
- # merging two df by key/keys. (may be used in database)
- # simple example
- left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
- 'A': ['A0', 'A1', 'A2', 'A3'],
- 'B': ['B0', 'B1', 'B2', 'B3']})
- right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
- 'C': ['C0', 'C1', 'C2', 'C3'],
- 'D': ['D0', 'D1', 'D2', 'D3']})
- print(left)
- print(right)
- #内连接,关联字段为'key'
- res = pd.merge(left, right, on='key')
- print(res)
- '''
- key A B C D
- 0 K0 A0 B0 C0 D0
- 1 K1 A1 B1 C1 D1
- 2 K2 A2 B2 C2 D2
- 3 K3 A3 B3 C3 D3
- '''
- # consider two keys
- left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
- 'key2': ['K0', 'K1', 'K0', 'K1'],
- 'A': ['A0', 'A1', 'A2', 'A3'],
- 'B': ['B0', 'B1', 'B2', 'B3']})
- right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
- 'key2': ['K0', 'K0', 'K0', 'K0'],
- 'C': ['C0', 'C1', 'C2', 'C3'],
- 'D': ['D0', 'D1', 'D2', 'D3']})
- print('1')
- print(left)
- print('2')
- print(right)
- print('3')
- res = pd.merge(left, right, on=['key1', 'key2'], how='inner') # default for how='inner'
- # how = ['left', 'right', 'outer', 'inner']
- print("test inner join")
- #内连接,且关联主键为'[key1,key2]'联合主键
- print(res)
- '''
- test inner join
- key1 key2 A B C D
- 0 K0 K0 A0 B0 C0 D0
- 1 K1 K0 A2 B2 C1 D1
- 2 K1 K0 A2 B2 C2 D2
- '''
- print('4')
- res = pd.merge(left, right, on=['key1', 'key2'], how='left')
- print("test left join")
- #左外连接,以左边的表为主表
- print(res)
- '''
- test left join
- key1 key2 A B C D
- 0 K0 K0 A0 B0 C0 D0
- 1 K0 K1 A1 B1 NaN NaN
- 2 K1 K0 A2 B2 C1 D1
- 3 K1 K0 A2 B2 C2 D2
- 4 K2 K1 A3 B3 NaN NaN
- '''
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
-
- """
- Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
- """
- from __future__ import print_function
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
-
- # plot data
-
- # Series
- data = pd.Series(np.random.randn(1000), index=np.arange(1000))
- print('1')
- print(data)
- data = data.cumsum()
- ##data.plot()
- print('2')
- print(data)
- # DataFrame
- data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))
- print('3')
- print(data)
- #本行加上上一行的数
- data = data.cumsum()
- print('4')
- print(data)
- # plot methods:
- # 'bar', 'hist', 'box', 'kde', 'area', scatter', hexbin', 'pie'
- ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label="Class 1")
- print('5')
- print(ax)
- data.plot.scatter(x='A', y='C', color='LightGreen', label='Class 2', ax=ax)
- plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。