赞
踩
核心代码
def linear_interpolation(x1, y1, x2, y2, x0): """ 线性插值函数 参数: x1, y1: 第一个已知数据点的位置和数据值 x2, y2: 第二个已知数据点的位置和数据值 x0: 要估算的位置 返回值: y0: 在位置 x0 处估算的数据值 """ y0 = y1 + (x0 - x1) * (y2 - y1) / (x2 - x1) return y0 # 示例数据 x1, y1 = 0, 10 x2, y2 = 5, 20 x0 = 2 # 使用线性插值估算在位置 x0 处的数据值 y0 = linear_interpolation(x1, y1, x2, y2, x0) print(f"在位置 {x0} 处的估算数据值为:{y0}")
数据格式:
分析:1.首先判断该数据是否需要插值
2.获取nan的索引位置,保存到list集合中去
3.根据获取的nan的索引位置,得到位置前后的数据,通过线性插值法算出该nan的值。
代码具体实现:
import numpy as np import pandas as pd def is_exist_nan(data): for d in data: if np.isnan(d): return True return False def get_nan_index_list(data): index_list = [] for index,d in enumerate(data): if np.isnan(d): index_list.append(index) return index_list def linear_interpolation(x1, y1, x2, y2, x0): """ 线性插值函数 参数: x1, y1: 第一个已知数据点的位置和数据值 x2, y2: 第二个已知数据点的位置和数据值 x0: 要估算的位置 返回值: y0: 在位置 x0 处估算的数据值 """ y0 = y1 + (x0 - x1) * (y2 - y1) / (x2 - x1) return y0 def get_first_data(data): for index,d in enumerate(data): if not np.isnan(d): return d def get_last_data(data): count = len(data) -1 for d in data: if not np.isnan(data[count]): return data[count] else: count=count-1 def digu(x2,data): if not np.isnan(data[x2]): return x2,data else: x2 = x2 + 1 return digu(x2,data) def get_new_data(nan_index_list, data): if nan_index_list[0] == 0: data[0] = get_first_data(data) nan_index_list.remove(0) if len(nan_index_list)>=1: if nan_index_list[len(nan_index_list)-1] == 26: data[26] =get_last_data(data) nan_index_list.remove(26) if len(nan_index_list) >=1: for nan_index in nan_index_list: x1 = nan_index - 1 y1 = data[x1] x2 = nan_index + 1 x2,data = digu(x2,data) y2 = data[x2] x0 = nan_index y0 = round(linear_interpolation(x1, y1, x2, y2, x0), 4) data[nan_index] = y0 return data if __name__ == '__main__': data1 = [np.nan, -0.3356, -0.3208, -0.3661, 0.2192, np.nan, np.nan, np.nan, -0.3709, -0.3779, 0.026, -0.2601, np.nan, -0.0238, -0.2241, -0.2105, -0.2623, 0.379, -0.2196, np.nan, -0.0835, 0.2895, 0.0415, -0.2323, -0.1782, -0.2308, -0.2265] if is_exist_nan(data1): print(data1) nan_index_list = get_nan_index_list(data1) new_data = get_new_data(nan_index_list,data1) print(new_data)
运行结果如下:
[nan, -0.3356, -0.3208, -0.3661, 0.2192, nan, nan, nan, -0.3709, -0.3779, 0.026, -0.2601, nan, -0.0238, -0.2241, -0.2105, -0.2623, 0.379, -0.2196, nan, -0.0835, 0.2895, 0.0415, -0.2323, -0.1782, -0.2308, -0.2265]
[-0.3356, -0.3356, -0.3208, -0.3661, 0.2192, 0.0717, -0.0758, -0.2234, -0.3709, -0.3779, 0.026, -0.2601, -0.1419, -0.0238, -0.2241, -0.2105, -0.2623, 0.379, -0.2196, -0.1515, -0.0835, 0.2895, 0.0415, -0.2323, -0.1782, -0.2308, -0.2265]
给大家提供一个思路,具体用的时候,推荐用pandas的interpolate方法实现。
import pandas as pd
if __name__ == '__main__':
# 原始数据,包含缺失值
data = [np.nan, -0.3356, -0.3208, -0.3661, 0.2192, np.nan, np.nan, np.nan, -0.3709, -0.3779, 0.026, -0.2601,
np.nan, -0.0238, -0.2241, -0.2105, -0.2623, 0.379, -0.2196, np.nan, -0.0835, 0.2895, 0.0415, -0.2323,
-0.1782, -0.2308, -0.2265]
# 将数据转换为pandas的Series对象,此时缺失值会自动转换为NaN
data_series = pd.Series(data)
# 执行线性插值,并处理第一个和最后一个NaN
interpolated_data = data_series.interpolate(limit_direction='both')
# 打印插值结果
print(interpolated_data.values)
import numpy as np import pandas as pd from scipy.signal import savgol_filter import matplotlib.pyplot as plt def get_interpolated_data(data): # 将数据转换为pandas的Series对象,此时缺失值会自动转换为NaN data_series = pd.Series(data) # 执行线性插值,并处理第一个和最后一个NaN interpolated_data = data_series.interpolate(limit_direction='both').tolist() interpolated_data = [round(i, 4) for i in interpolated_data] # 保留四位小数 return interpolated_data def get_sg_data(data, window_size, polyorder): smoothed_data = savgol_filter(data, window_size, polyorder).tolist() smoothed_data = [round(i, 4) for i in smoothed_data] # 保留四位小数 return smoothed_data if __name__ == '__main__': # 原始数据,包含缺失值 data = [np.nan, -0.3356, -0.3208, -0.3661, 0.2192, np.nan, np.nan, np.nan, -0.3709, -0.3779, 0.026, -0.2601, np.nan, -0.0238, -0.2241, -0.2105, -0.2623, 0.379, -0.2196, np.nan, -0.0835, 0.2895, 0.0415, -0.2323, -0.1782, -0.2308, -0.2265] interpolated_data = get_interpolated_data(data) sg_data = get_sg_data(interpolated_data,5,2) sg_data2 = get_sg_data(interpolated_data,9,3) print(interpolated_data) print(sg_data) print(sg_data2) plt.plot(interpolated_data, label='interpolated_data') plt.plot(sg_data, label='sg_data window_size=5 polyorder=2') plt.plot(sg_data2, label='sg_data2 window_size=9 polyorder=3') plt.xlabel('Time') plt.ylabel('Value') plt.title('Line Plot') plt.legend() plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。