赞
踩
文末附上完整算法模板的获取方式
# 参考代码:https://aistudio.baidu.com/aistudio/projectdetail/2654369?forkThirdPart=1 class LpBayes: def __init__(self,isWeights=False): # 存储连续属性列 self.continuousCols = [] # 是否使用属性加权 # self.isWeights = isWeights # 存储离散属性列 self.discreteCols = [] # 存储列名 self.featureList = [] def initFeats(self,featureList): self.featureList = featureList def getFeatClass(self,X): for j,value in enumerate(X[0]): if isinstance(value,float): self.continuousCols.append(j) else: self.discreteCols.append(j) def fit(self, X, y, featureList): # 初始化列名 self.initFeats(featureList) # 初始化属性的数值类型 self.getFeatClass(X) self.Xtrain = np.array(X) self.ytrain = np.array(y).reshape((-1, 1)) # 将离散属性和连续属性划分开进行处理 self.dis_X = X[:, self.discreteCols] self.con_X = X[:, self.continuousCols] # 计算类别集合 self.classSet = np.unique(y) # 获取所有离散型属性的条件概率 self.dis_P_all = self.Get_P_of_discrete() # 计算每个类别的先验概率 self.P_c = ((self.classSet == self.ytrain).sum(axis=0) + 1) / (X.shape[0] + self.classSet.shape[0]) # # 将连续值分箱并计算频率概率 # if self.isWeights: # self.CountVal() def Get_P_of_discrete(self): P_all = [] # 遍历所有类别 for i, c in enumerate(self.classSet): # 取出当前类别对应的样本 X_of_class = self.Xtrain[(self.ytrain == c).ravel()] # 记录每一属性的每一个取值的条件概率 an_temp_dict = {} # 遍历样本中的每一个属性 for j in range(X_of_class.shape[1]): if j in self.discreteCols: temp_dict = {} # 获取当前属性的所有取值 temp_uni = np.unique(self.Xtrain[:, j]) for xvalue in temp_uni: # 根据当前属性取值计算其条件概率 temp_dict[xvalue] = self.P(X_of_class, j, xvalue, temp_uni.shape[0]) an_temp_dict[self.featureList[j]] = temp_dict # 把当前类别的属性条件概率加入列表中 P_all.insert(i, an_temp_dict) return P_all # 连续值属性的条件概率 def Get_P_of_continuous(self, X, j, x_i): meanCol = X[:, j].mean() stdCol = X[:, j].std() return st.norm.pdf(x_i, loc=meanCol, scale=stdCol) # 计算离散型属性的条件概率 def P(self, X, x, x_i, N): return ((X[:, x] == x_i).sum() + 1) / (X.shape[0] + N) def predict(self, X_pre): # 初始化每个待预测样本的标签 y_pre = np.ones(X_pre.shape[0], dtype='object') # 遍历每一个待预测的样本 for i, X in enumerate(X_pre): max = 0 res_c = 0 for k, c in enumerate(self.classSet): # 枚举y的取值计算概率 res = 1 X_of_class = self.Xtrain[(self.ytrain == c).ravel()] # 这是y等于c时的集合 j = 0 for x in range(X.shape[0]): if x in self.continuousCols:# 连续值 res *= self.Get_P_of_continuous(X_of_class, x, X[x]) else: res *= (self.dis_P_all[k][self.featureList[j]][X[x]]) j += 1 res *= self.P_c[k] if res > max: # 看看哪种情况可能性最大 max = res res_c = c y_pre[i] = res_c return y_pre.reshape((-1,1)) def predictDF(self,X_pre,featureList): predf = pd.concat([pd.DataFrame(X_pre, columns=featureList), pd.DataFrame(self.predict(X_pre), columns=['预测标签'])],axis=1) return predf def score(self,Xtest,ytest): ypre = self.predict(Xtest) # 计算准确率 acc = (ypre.reshape(-1) == ytest).sum() / ytest.shape[0] # print('准确率为:',acc) return acc
获取完整项目文件可以关注微信公众号:艺千秋录
输入:数学建模算法模板
即可获取完整的项目文件
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。