赞
踩
直接通过赋值为空,添加一列。
- >>> import pandas as pd
- >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), index = ['row1', 'row2', 'row3'], columns=['col1', 'col2', 'col3', 'col4'])
- >>> df
- col1 col2 col3 col4
- row1 0 1 2 3
- row2 4 5 6 7
- row3 8 9 10 11
- >>>
- >>> df['col5']=''
- >>>
- >>> df
- col1 col2 col3 col4 col5
- row1 0 1 2 3
- row2 4 5 6 7
- row3 8 9 10 11
通过一个list给新加的列赋值,添加一列。注意,list里的元素个数要跟dataframe的行数一致,否则回报长度对不齐的错误。
- >>> df['col6']=[1,1,1]
- >>>
- >>> df
- col1 col2 col3 col4 col5 col6
- row1 0 1 2 3 1
- row2 4 5 6 7 1
- row3 8 9 10 11 1
-
- >>> df['col7']=[]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3119, in __setitem__
- self._set_item(key, value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3194, in _set_item
- value = self._sanitize_column(key, value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3391, in _sanitize_column
- value = _sanitize_index(value, self.index, copy=False)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 4001, in _sanitize_index
- raise ValueError('Length of values does not match length of ' 'index')
- ValueError: Length of values does not match length of index
- >>>
- >>> df['col7']=[1]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3119, in __setitem__
- self._set_item(key, value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3194, in _set_item
- value = self._sanitize_column(key, value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3391, in _sanitize_column
- value = _sanitize_index(value, self.index, copy=False)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 4001, in _sanitize_index
- raise ValueError('Length of values does not match length of ' 'index')
- ValueError: Length of values does not match length of index
如果需要在指定位置处添加一列,用insert方法实现。DataFrame.insert(loc, column, value, allow_duplicates=False),loc-位置,column-列名,value-元素值,allow_duplicates-是否允许重列。
- >>> df
- col1 col2 col3 col4 col5 col6
- row1 0 1 2 3 1
- row2 4 5 6 7 1
- row3 8 9 10 11 1
- >>>
- >>> df.insert(1,'col7','')
- >>>
- >>> df
- col1 col7 col2 col3 col4 col5 col6
- row1 0 1 2 3 1
- row2 4 5 6 7 1
- row3 8 9 10 11 1
- >>>
- >>> df.insert(4,'col8',[2,2,2])
- >>>
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>>
- >>> col1=df['col1']
- >>>
- >>> col1
- row1 0
- row2 4
- row3 8
- Name: col1, dtype: int64
- >>>
- >>> type(col1)
- <class 'pandas.core.series.Series'>
注意,不能用行名取行,也不能用数字索引取行列,以下方式都会报错。
- >>> row1=df['row1']
- Traceback (most recent call last):
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
- return self._engine.get_loc(key)
- File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
- File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
- KeyError: 'row1'
-
- During handling of the above exception, another exception occurred:
-
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
- return self._getitem_column(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
- return self._get_item_cache(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
- values = self._data.get(item)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/internals.py", line 4115, in get
- loc = self.items.get_loc(item)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
- return self._engine.get_loc(self._maybe_cast_indexer(key))
- File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
- File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
- KeyError: 'row1'
- >>>
- >>> row1=df[0]
- Traceback (most recent call last):
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
- return self._engine.get_loc(key)
- File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
- File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
- KeyError: 0
-
- During handling of the above exception, another exception occurred:
-
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
- return self._getitem_column(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
- return self._get_item_cache(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
- values = self._data.get(item)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/internals.py", line 4115, in get
- loc = self.items.get_loc(item)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
- return self._engine.get_loc(self._maybe_cast_indexer(key))
- File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
- File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
- KeyError: 0
- >>>
- >>> row1=df[0,:]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
- return self._getitem_column(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
- return self._get_item_cache(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2487, in _get_item_cache
- res = cache.get(item)
- TypeError: unhashable type: 'slice'
- >>>
- >>> col1=df[:,0]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
- return self._getitem_column(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
- return self._get_item_cache(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2487, in _get_item_cache
- res = cache.get(item)
- TypeError: unhashable type: 'slice'
取某些行可以用限定行号的方式。
- >>> row1=df[0:1]
- >>>
- >>> row1
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- >>>
- >>> row2=df[1:2]
- >>>
- >>> row2
- col1 col7 col2 col3 col8 col4 col5 col6
- row2 4 5 6 2 7 1
- >>>
- >>> row23=df[1:3]
- >>>
- >>> row23
- col1 col7 col2 col3 col8 col4 col5 col6
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- # 取第一行
- >>> row1=df.iloc[0]
- >>>
- >>> row1
- col1 0
- col7
- col2 1
- col3 2
- col8 2
- col4 3
- col5
- col6 1
- Name: row1, dtype: object
-
- # 取第1,2,3行
- >>> row123=df.iloc[[0,1,2]]
- >>>
- >>> row123
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>>
- >>> row12=df.iloc[[0,1]]
- >>>
- >>> row12
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
-
- # 连续取多行
- >>> row12=df.iloc[0:2]
- >>>
- >>> row12
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- >>>
- >>> row12=df.iloc[0:2,:]
- >>>
- >>> row12
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- # 取行时可以不指定列,但取列时必须用:,来指定全行
- >>> col1=df.iloc[:,0]
- >>>
- >>> col1
- row1 0
- row2 4
- row3 8
- Name: col1, dtype: int64
-
- # 用list[0,2]指定取第1,3列
- >>> col13=df.iloc[:,[0,2]]
- >>>
- >>> col13
- col1 col2
- row1 0 1
- row2 4 5
- row3 8 9
- >>>
- >>> col123=df.iloc[:,[0,1,2]]
- >>>
- >>> col123
- col1 col7 col2
- row1 0 1
- row2 4 5
- row3 8 9
-
- # 用0:2指定连续的多列
- >>> col12=df.iloc[:,0:2]
- >>>
- >>> col12
- col1 col7
- row1 0
- row2 4
- row3 8
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>>
- >>> row1=df.loc['row1']
- >>>
- >>> row1
- col1 0
- col7
- col2 1
- col3 2
- col8 2
- col4 3
- col5
- col6 1
- Name: row1, dtype: object
- >>>
- >>> row2=df.loc['row2',:]
- >>>
- >>> row2
- col1 4
- col7
- col2 5
- col3 6
- col8 2
- col4 7
- col5
- col6 1
- Name: row2, dtype: object
- >>>
- >>> row12=df.loc[['row1','row2']]
- >>>
- >>> row12
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- >>>
- >>> row123=df.loc['row1':'row3',:]
- >>>
- >>> row123
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
- >>>
- >>> col1=df.loc[:,'col1']
- >>>
- >>> col1
- row1 0
- row2 4
- row3 8
- Name: col1, dtype: int64
-
- >>> col128=df.loc[:,['col1','col2','col8']]
- >>>
- >>> col128
- col1 col2 col8
- row1 0 1 2
- row2 4 5 2
- row3 8 9 2
- >>>
- >>> col1723=df.loc[:,'col1':'col3']
- >>>
- >>> col1723
- col1 col7 col2 col3
- row1 0 1 2
- row2 4 5 6
- row3 8 9 10
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- # 取出第一列,有对应数字为4的所有行
- >>> df.loc[df['col1']==4]
- col1 col7 col2 col3 col8 col4 col5 col6
- row2 4 5 6 2 7 1
- >>>
- >>> df.loc[df['col8']==2]
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- >>> df.loc[~(df['col1']!=4)]
- col1 col7 col2 col3 col8 col4 col5 col6
- row2 4 5 6 2 7 1
- >>>
- >>> df.loc[~(df['col1']==4)]
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row3 8 9 10 2 11 1
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 4 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- # 把第一列中值为4的行选取出来,再选择第'col1'列的元素赋值成新的44
- >>> df.loc[df['col1']==4,'col1']=44
- >>>
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 44 5 6 2 7 1
- row3 8 9 10 2 11 1
-
- # 把第一列中值为4的行选取出来,再选择第'col4'列的元素赋值成新的44
- >>> df.loc[df['col1']==44,'col4']=44
- >>>
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 44 5 6 2 44 1
- row3 8 9 10 2 11 1
-
- # 选择'row3'行,其中有值为2的元素赋值成新的44
- >>> df.loc['row3',df.loc['row3',:]==2]=44
- >>>
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 44 5 6 2 44 1
- row3 8 9 10 44 11 1
-
- # 改变某行某列的元素值
- >>> df
- col1 col2 col3 col4
- row1 0 1 2 3
- row2 4 5 6 7
- row3 8 9 10 11
- >>>
- >>> df.at['row1','col1']=100
- >>>
- >>> df
- col1 col2 col3 col4
- row1 100 1 2 3
- row2 4 5 6 7
- row3 8 9 10 11
- >>> df
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 0 1 2 2 3 1
- row2 44 5 6 2 44 1
- row3 8 9 10 44 11 1
-
- # 将所有值大于40的元素赋值为新的40,注意包含np.nan空值时会报错,需要先去除或者赋值
- >>> df[df>40]=40
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3114, in __setitem__
- self._setitem_frame(key, value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3161, in _setitem_frame
- self._check_inplace_setting(value)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 4503, in _check_inplace_setting
- raise TypeError('Cannot do inplace boolean setting on '
- TypeError: Cannot do inplace boolean setting on mixed-types with a non np.nan value
- >>>
- >>>
- >>> df>40
- col1 col7 col2 col3 col8 col4 col5 col6
- row1 False True False False False False True False
- row2 True True False False False True True False
- row3 False True False False True False True False
-
- >>> df=df.drop(['col7'], axis=1)
- >>> df=df.drop(['col5'], axis=1)
- >>>
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 44 5 6 2 44 1
- row3 8 9 10 44 11 1
- >>>
- >>> df[df>40]=40
- >>>
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
-
- >>> for index in df['col1'].index:
- ... idx=df['col1'].get(index)
- ... print(idx)
- ...
- 0
- 40
- 8
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
- >>>
- >>> df.at[4,'col1']
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2141, in __getitem__
- key = self._convert_key(key)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2227, in _convert_key
- raise ValueError("At based indexing on an non-integer "
- ValueError: At based indexing on an non-integer index can only have non-integer indexers
- >>>
- >>> df.at['row4','col1']
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2142, in __getitem__
- return self.obj._get_value(*key, takeable=self._takeable)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2539, in _get_value
- return engine.get_value(series._values, index)
- File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
- File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
- File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
- File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
- File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
- KeyError: 'row4'
- >>>
- >>> df.at['row2','col1']
- 40
-
- >>> df.iloc[1].at['col1']
- 40
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
- >>>
- >>> df.iat[1,2]
- 6
- >>>
- >>> df.iloc[3].iat[4]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
- return self._getitem_axis(maybe_callable, axis=axis)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2102, in _getitem_axis
- self._validate_integer(key, axis)
- File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2009, in _validate_integer
- raise IndexError("single positional indexer is out-of-bounds")
- IndexError: single positional indexer is out-of-bounds
- >>>
- >>> df.iloc[2].iat[4]
- 11
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
- >>>
- >>>
- >>> df.loc['row1','col1']
- 0
- >>>
- >>> df.loc['row1']
- col1 0
- col2 1
- col3 2
- col8 2
- col4 3
- col6 1
- Name: row1, dtype: int64
- >>>
- >>> df.loc[['row1','row3']]
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row3 8 9 10 40 11 1
- >>> df
- col1 col2 col3 col8 col4 col6
- row1 0 1 2 2 3 1
- row2 40 5 6 2 40 1
- row3 8 9 10 40 11 1
- >>>
- >>> df.iloc[0,2]
- 2
- >>>
- >>> df.iloc[2]
- col1 8
- col2 9
- col3 10
- col8 40
- col4 11
- col6 1
- Name: row3, dtype: int64
【1】https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.insert.html
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。