- # 186、pandas.Series.is_monotonic_increasing属性
- property pandas.Series.is_monotonic_increasing
- Return boolean if values in the object are monotonically increasing.
- Returns:
- bool
- import pandas as pd
- # 创建一个单调递增的系列
- data_increasing = pd.Series([1, 2, 3, 4, 5])
- # 判断系列中的值是否单调递增
- is_monotonic_increasing = data_increasing.is_monotonic_increasing
- print(f"系列值是否单调递增: {is_monotonic_increasing}")
- # 创建一个非单调递增的系列
- data_not_increasing = pd.Series([1, 3, 2, 4, 5])
- # 判断系列中的值是否单调递增
- is_monotonic_increasing = data_not_increasing.is_monotonic_increasing
- print(f"系列值是否单调递增: {is_monotonic_increasing}")
- # 系列值是否单调递增: True
- # 系列值是否单调递增: False
- # 187、pandas.Series.is_monotonic_decreasing属性
- property pandas.Series.is_monotonic_decreasing
- Return boolean if values in the object are monotonically decreasing.
- Returns:
- bool
- import pandas as pd
- # 创建一个单调递减的系列
- data_decreasing = pd.Series([5, 4, 3, 2, 1])
- # 判断系列中的值是否单调递减
- is_monotonic_decreasing = data_decreasing.is_monotonic_decreasing
- print(f"系列值是否单调递减: {is_monotonic_decreasing}")
- # 创建一个非单调递减的系列
- data_not_decreasing = pd.Series([5, 3, 4, 2, 1])
- # 判断系列中的值是否单调递减
- is_monotonic_decreasing = data_not_decreasing.is_monotonic_decreasing
- print(f"系列值是否单调递减: {is_monotonic_decreasing}")
- # 系列值是否单调递减: True
- # 系列值是否单调递减: False
- # 188、pandas.Series.value_counts方法
- pandas.Series.value_counts(normalize=False, sort=True, ascending=False, bins=None, dropna=True)
- Return a Series containing counts of unique values.
- The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default.
- Parameters:
- normalize
- bool, default False
- If True then the object returned will contain the relative frequencies of the unique values.
- sort
- bool, default True
- Sort by frequencies when True. Preserve the order of the data when False.
- ascending
- bool, default False
- Sort in ascending order.
- bins
- int, optional
- Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data.
- dropna
- bool, default True
- Don’t include counts of NaN.
- Returns:
- Series

- # 188-1、基本用法
- import pandas as pd
- data = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
- counts = data.value_counts()
- print(counts, end='\n\n')
- # 188-2、返回相对频率
- import pandas as pd
- data = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
- counts_normalized = data.value_counts(normalize=True)
- print(counts_normalized, end='\n\n')
- # 188-3、按升序排列
- import pandas as pd
- data = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
- counts_ascending = data.value_counts(ascending=True)
- print(counts_ascending, end='\n\n')
- # 188-4、分箱计数
- import pandas as pd
- data = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
- counts_bins = data.value_counts(bins=3)
- print(counts_bins, end='\n\n')
- # 188-5、包括NaN值的计数
- import pandas as pd
- data_with_nan = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, None])
- counts_with_nan = data_with_nan.value_counts(dropna=False)
- print(counts_with_nan)

- # 188-1、基本用法
- # 4 4
- # 3 3
- # 2 2
- # 1 1
- # Name: count, dtype: int64
- # 188-2、返回相对频率
- # 4 0.4
- # 3 0.3
- # 2 0.2
- # 1 0.1
- # Name: proportion, dtype: float64
- # 188-3、按升序排列
- # 1 1
- # 2 2
- # 3 3
- # 4 4
- # Name: count, dtype: int64
- # 188-4、分箱计数
- # (3.0, 4.0] 4
- # (0.996, 2.0] 3
- # (2.0, 3.0] 3
- # Name: count, dtype: int64
- # 188-5、包括NaN值的计数
- # 4.0 4
- # 3.0 3
- # 2.0 2
- # 1.0 1
- # NaN 1
- # Name: count, dtype: int64

- # 189、pandas.Series.align方法
- pandas.Series.align(other, join='outer', axis=None, level=None, copy=None, fill_value=None, method=_NoDefault.no_default, limit=_NoDefault.no_default, fill_axis=_NoDefault.no_default, broadcast_axis=_NoDefault.no_default)
- Align two objects on their axes with the specified join method.
- Join method is specified for each axis Index.
- Parameters:
- otherDataFrame or Series
- join{‘outer’, ‘inner’, ‘left’, ‘right’}, default ‘outer’
- Type of alignment to be performed.
- left: use only keys from left frame, preserve key order.
- right: use only keys from right frame, preserve key order.
- outer: use union of keys from both frames, sort keys lexicographically.
- inner: use intersection of keys from both frames, preserve the order of the left keys.
- axisallowed axis of the other object, default None
- Align on index (0), columns (1), or both (None).
- levelint or level name, default None
- Broadcast across a level, matching Index values on the passed MultiIndex level.
- copybool, default True
- Always returns new objects. If copy=False and no reindexing is required then original objects are returned.
- Note
- The copy keyword will change behavior in pandas 3.0. Copy-on-Write will be enabled by default, which means that all methods with a copy keyword will use a lazy copy mechanism to defer the copy and ignore the copy keyword. The copy keyword will be removed in a future version of pandas.
- You can already get the future behavior and improvements through enabling copy on write pd.options.mode.copy_on_write = True
- fill_valuescalar, default np.nan
- Value to use for missing values. Defaults to NaN, but can be any “compatible” value.
- method{‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}, default None
- Method to use for filling holes in reindexed Series:
- pad / ffill: propagate last valid observation forward to next valid.
- backfill / bfill: use NEXT valid observation to fill gap.
- Deprecated since version 2.1.
- limitint, default None
- If method is specified, this is the maximum number of consecutive NaN values to forward/backward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None.
- Deprecated since version 2.1.
- fill_axis{0 or ‘index’} for Series, {0 or ‘index’, 1 or ‘columns’} for DataFrame, default 0
- Filling axis, method and limit.
- Deprecated since version 2.1.
- broadcast_axis{0 or ‘index’} for Series, {0 or ‘index’, 1 or ‘columns’} for DataFrame, default None
- Broadcast values along this axis, if aligning two objects of different dimensions.
- Deprecated since version 2.1.
- Returns:
- tuple of (Series/DataFrame, type of other)
- Aligned objects.

189-2-2、join(可选,默认值为'outer'):{'outer', 'inner', 'left', 'right'}
189-2-2-1、'outer': 返回两个对象的并集,包含所有索引。
189-2-2-2、'inner': 返回两个对象的交集,仅包含共同的索引。
189-2-2-3、'left': 返回左侧对象的所有索引,缺失的右侧对象的索引用NaN填充。
189-2-2-4、'right': 返回右侧对象的所有索引,缺失的左侧对象的索引用NaN填充。
返回一个元组,包含对齐后的两个对象,形如(left, right)
是对齐后的右侧对象。 如果fill_value
- # 189-1、合并不同客户的销售数据
- import pandas as pd
- # 创建两个不同的销售数据 Series
- sales_jan = pd.Series([200, 300], index=['Alice', 'Bob'])
- sales_feb = pd.Series([150, 400, 250], index=['Alice', 'Charlie', 'Bob'])
- # 使用align方法对齐两个Series
- aligned_jan, aligned_feb = sales_jan.align(sales_feb, join='outer', fill_value=0)
- print("对齐后的1月销售数据:")
- print(aligned_jan)
- print("\n对齐后的2月销售数据:")
- print(aligned_feb)
- # 计算每个客户在1月和2月的销售总额
- total_sales = aligned_jan + aligned_feb
- print("\n客户总销售额:")
- print(total_sales, end='\n\n')
- # 189-2、时间序列对比
- import pandas as pd
- # 创建两个时间序列数据Series
- temperature = pd.Series([22, 24, 23], index=pd.date_range('2023-01-01', periods=3))
- humidity = pd.Series([30, 32], index=pd.date_range('2023-01-01', periods=2))
- # 使用align方法对齐两个时间序列
- aligned_temp, aligned_hum = temperature.align(humidity, join='inner')
- print("对齐后的温度数据:")
- print(aligned_temp)
- print("\n对齐后的湿度数据:")
- print(aligned_hum)
- # 计算温度与湿度的关系
- correlation = aligned_temp.corr(aligned_hum)
- print(f"\n温度与湿度的相关性:{correlation}", end='\n\n')
- # 189-3、处理多层索引
- import pandas as pd
- # 创建多层索引的Series
- index = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 1), ('B', 2)])
- data1 = pd.Series([10, 20, 30, 40], index=index)
- index2 = pd.MultiIndex.from_tuples([('A', 1), ('B', 1), ('B', 3)])
- data2 = pd.Series([5, 25, 15], index=index2)
- # 对齐多层索引的Series
- aligned_data1, aligned_data2 = data1.align(data2, join='outer', fill_value=0)
- print("对齐后的第一组数据:")
- print(aligned_data1)
- print("\n对齐后的第二组数据:")
- print(aligned_data2)
- # 计算对齐后数据的和
- sum_data = aligned_data1 + aligned_data2
- print("\n对齐后数据的和:")
- print(sum_data, end='\n\n')
- # 189-4、数据清理与补齐
- import pandas as pd
- # 创建带有缺失值的Series
- data1 = pd.Series([1, 2, None, 4], index=['A', 'B', 'C', 'D'])
- data2 = pd.Series([None, 2, 3, None], index=['A', 'B', 'C', 'E'])
- # 对齐两个Series,使用fill_value填充缺失值
- aligned_data1, aligned_data2 = data1.align(data2, join='outer', fill_value=0)
- print("对齐后的数据1:")
- print(aligned_data1)
- print("\n对齐后的数据2:")
- print(aligned_data2)
- # 计算补齐后的总和
- total = aligned_data1 + aligned_data2
- print("\n补齐后的数据总和:")
- print(total, end='\n\n')
- # 189-5、数据比较
- import pandas as pd
- # 创建两个带有不同客户的销售数据Series
- sales_last_month = pd.Series([300, 500, 200], index=['Alice', 'Bob', 'Charlie'])
- sales_this_month = pd.Series([350, 450, 300, 100], index=['Alice', 'Bob', 'David', 'Charlie'])
- # 对齐两个Series
- aligned_last_month, aligned_this_month = sales_last_month.align(sales_this_month, join='outer', fill_value=0)
- print("上个月的销售数据:")
- print(aligned_last_month)
- print("\n这个月的销售数据:")
- print(aligned_this_month)
- # 计算销售增长
- sales_growth = aligned_this_month - aligned_last_month
- print("\n销售增长(这个月 - 上个月):")
- print(sales_growth, end='\n\n')
- # 189-6、索引重命名与对齐
- import pandas as pd
- # 创建两个带有不同索引的Series
- data_a = pd.Series([5, 10, 15], index=['x', 'y', 'z'])
- data_b = pd.Series([1, 2, 3], index=['y', 'z', 'w'])
- # 重命名索引以便于理解
- data_a.index = ['Item_A1', 'Item_A2', 'Item_A3']
- data_b.index = ['Item_B1', 'Item_B2', 'Item_B3']
- # 使用align方法对齐
- aligned_a, aligned_b = data_a.align(data_b, join='outer', fill_value=0)
- print("对齐后的数据A:")
- print(aligned_a)
- print("\n对齐后的数据B:")
- print(aligned_b)
- # 计算A和B的和
- total_data = aligned_a + aligned_b
- print("\n对齐后数据A和B的和:")
- print(total_data, end='\n\n')
- # 189-7、填补缺失数据
- import pandas as pd
- # 创建一个带有缺失值的Series
- series_a = pd.Series([1, 2, 3, None], index=['A', 'B', 'C', 'D'])
- series_b = pd.Series([None, 2, None, 4, 5], index=['A', 'B', 'C', 'E', 'F'])
- # 对齐两个Series,使用fill_value填充缺失值
- aligned_a, aligned_b = series_a.align(series_b, join='outer', fill_value=0)
- print("对齐后的数据A:")
- print(aligned_a)
- print("\n对齐后的数据B:")
- print(aligned_b)
- # 计算合并后的序列总和
- filled_data = aligned_a + aligned_b
- print("\n填补缺失值后的数据总和:")
- print(filled_data, end='\n\n')
- # 189-8、在数据科学项目中的应用
- import pandas as pd
- # 创建Product价格数据和销量数据
- prices = pd.Series([100, 150, 200], index=['Product_A', 'Product_B', 'Product_C'])
- sales = pd.Series([10, 5, 8, 3], index=['Product_A', 'Product_B', 'Product_D', 'Product_C'])
- # 对齐价格和销量数据
- aligned_prices, aligned_sales = prices.align(sales, join='outer', fill_value=0)
- print("对齐后的价格数据:")
- print(aligned_prices)
- print("\n对齐后的销量数据:")
- print(aligned_sales)
- # 计算每个产品的销售收入
- revenue = aligned_prices * aligned_sales
- print("\n每个产品的销售收入:")
- print(revenue, end='\n\n')

- # 189-1、合并不同客户的销售数据
- # 对齐后的1月销售数据:
- # Alice 200.0
- # Bob 300.0
- # Charlie 0.0
- # dtype: float64
- #
- # 对齐后的2月销售数据:
- # Alice 150
- # Bob 250
- # Charlie 400
- # dtype: int64
- #
- # 客户总销售额:
- # Alice 350.0
- # Bob 550.0
- # Charlie 400.0
- # dtype: float64
- # 189-2、时间序列对比
- # 对齐后的温度数据:
- # 2023-01-01 22
- # 2023-01-02 24
- # Freq: D, dtype: int64
- #
- # 对齐后的湿度数据:
- # 2023-01-01 30
- # 2023-01-02 32
- # Freq: D, dtype: int64
- #
- # 温度与湿度的相关性:0.9999999999999999
- # 189-3、处理多层索引
- # 对齐后的第一组数据:
- # A 1 10.0
- # 2 20.0
- # B 1 30.0
- # 2 40.0
- # 3 0.0
- # dtype: float64
- #
- # 对齐后的第二组数据:
- # A 1 5.0
- # 2 0.0
- # B 1 25.0
- # 2 0.0
- # 3 15.0
- # dtype: float64
- #
- # 对齐后数据的和:
- # A 1 15.0
- # 2 20.0
- # B 1 55.0
- # 2 40.0
- # 3 15.0
- # dtype: float64
- # 189-4、数据清理与补齐
- # 对齐后的数据1:
- # A 1.0
- # B 2.0
- # C 0.0
- # D 4.0
- # E 0.0
- # dtype: float64
- #
- # 对齐后的数据2:
- # A 0.0
- # B 2.0
- # C 3.0
- # D 0.0
- # E 0.0
- # dtype: float64
- #
- # 补齐后的数据总和:
- # A 1.0
- # B 4.0
- # C 3.0
- # D 4.0
- # E 0.0
- # dtype: float64
- # 189-5、数据比较
- # 上个月的销售数据:
- # Alice 300.0
- # Bob 500.0
- # Charlie 200.0
- # David 0.0
- # dtype: float64
- #
- # 这个月的销售数据:
- # Alice 350
- # Bob 450
- # Charlie 100
- # David 300
- # dtype: int64
- #
- # 销售增长(这个月 - 上个月):
- # Alice 50.0
- # Bob -50.0
- # Charlie -100.0
- # David 300.0
- # dtype: float64
- # 189-6、索引重命名与对齐
- # 对齐后的数据A:
- # Item_A1 5.0
- # Item_A2 10.0
- # Item_A3 15.0
- # Item_B1 0.0
- # Item_B2 0.0
- # Item_B3 0.0
- # dtype: float64
- #
- # 对齐后的数据B:
- # Item_A1 0.0
- # Item_A2 0.0
- # Item_A3 0.0
- # Item_B1 1.0
- # Item_B2 2.0
- # Item_B3 3.0
- # dtype: float64
- #
- # 对齐后数据A和B的和:
- # Item_A1 5.0
- # Item_A2 10.0
- # Item_A3 15.0
- # Item_B1 1.0
- # Item_B2 2.0
- # Item_B3 3.0
- # dtype: float64
- # 189-7、填补缺失数据
- # 对齐后的数据A:
- # A 1.0
- # B 2.0
- # C 3.0
- # D 0.0
- # E 0.0
- # F 0.0
- # dtype: float64
- #
- # 对齐后的数据B:
- # A 0.0
- # B 2.0
- # C 0.0
- # D 0.0
- # E 4.0
- # F 5.0
- # dtype: float64
- #
- # 填补缺失值后的数据总和:
- # A 1.0
- # B 4.0
- # C 3.0
- # D 0.0
- # E 4.0
- # F 5.0
- # dtype: float64
- # 189-8、在数据科学项目中的应用
- # 对齐后的价格数据:
- # Product_A 100.0
- # Product_B 150.0
- # Product_C 200.0
- # Product_D 0.0
- # dtype: float64
- #
- # 对齐后的销量数据:
- # Product_A 10
- # Product_B 5
- # Product_C 3
- # Product_D 8
- # dtype: int64
- #
- # 每个产品的销售收入:
- # Product_A 1000.0
- # Product_B 750.0
- # Product_C 600.0
- # Product_D 0.0
- # dtype: float64

- # 190、pandas.Series.case_when方法
- pandas.Series.case_when(caselist)
- Replace values where the conditions are True.
- Parameters:
- caselistA list of tuples of conditions and expected replacements
- Takes the form: (condition0, replacement0), (condition1, replacement1), … . condition should be a 1-D boolean array-like object or a callable. If condition is a callable, it is computed on the Series and should return a boolean Series or array. The callable must not change the input Series (though pandas doesn`t check it). replacement should be a 1-D array-like object, a scalar or a callable. If replacement is a callable, it is computed on the Series and should return a scalar or Series. The callable must not change the input Series (though pandas doesn`t check it).
- New in version 2.2.0.
- Returns:
- Series
190-2-1、caselist(必须):一个包含条件和值的列表,每个元素都是一个元组(condition, value)
- # 190、pandas.Series.case_when方法
- import pandas as pd
- c = pd.Series([6, 7, 8, 9], name='c')
- a = pd.Series([0, 0, 1, 2])
- b = pd.Series([0, 3, 4, 5])
- c.case_when(caselist=[(a.gt(0), a), (b.gt(0), b)])
- print(c)
- # 190、pandas.Series.case_when方法
- # 0 6
- # 1 7
- # 2 8
- # 3 9
- # Name: c, dtype: int64
