赞
踩
(1) K -means
(2) PAM
1.K-means算法对waveform数据进行聚类(选取数据集的第7维和第10维作为x轴y轴进行可视化)
2.K-means算法对有20%高斯噪声的waveform数据进行聚类(选取数据集的第7维和第10维作为x轴y轴进行可视化)
3.PAM算法对waveform数据进行聚类(选取数据集的第7维和第10维作为x轴y轴进行可视化)
4.PAM算法对有20%高斯噪声的waveform数据进行聚类(选取数据集的第7维和第10维作为x轴y轴进行可视化)
• 图像分割部分
无噪声原图 有噪声原图
5.K-means算法对无噪声得图像进行分割
6.K-means算法对有噪声得图像进行分割
7.PAM算法对无噪声得图像进行分割
8.PAM算法对有噪声得图像进行分割
- # -*- coding:utf-8 -*-
- from numpy import *
- import pandas as pd
- import matplotlib.pyplot as plt
- import random
- import numpy as np
- from PIL import Image
-
-
- def image_gauss_noise(image): # 图片添加高斯噪声
- img = image.astype(np.int16) # 此步是为了避免像素点小于0,大于255的情况
- for i in range(img.shape[0]):
- for j in range(img.shape[1]):
- img[i, j] += random.gauss(mu=0, sigma=10)
- img[img > 255] = 255
- img[img < 0] = 0
- img = img.astype(np.uint8)
- return img
-
-
- def data_gauss_noise(data): # 10%数据添加高斯噪声
- m, n = shape(data)
- msample = set((m * np.random.rand(int(m * 0.2))).astype(int))
- for i in msample:
- for j in range(n):
- data[i, j] += random.gauss(mu=0, sigma=0.1)
- return data
-
-
- # 计算两个向量的距离,欧式距离
- def disMea(vecA, vecB):
- return sqrt(sum(power(vecA - vecB, 2)))
-
-
- # 随机选择中心点
- def createCent(dataSet, k):
- n = shape(dataSet)[1]
- centriods = mat(zeros((k, n)))
- for j in range(n):
- minJ = min(dataSet[:, j])
- rangeJ = float(max(array(dataSet)[:, j]) - minJ)
- centriods[:, j] = minJ + rangeJ * np.random.rand(k, 1)
- return centriods
-
-
- def kmeans(dataSet, k):
- m = shape(dataSet)[0]
- clusterA = mat(zeros((m, 1)))
- centriods = createCent(dataSet, k)
- clusterC = True
- itr = 10
- while clusterC and itr:
- clusterC = False
- for i in range(m):
- minDist = inf
- minIndex = -1
- for j in range(k):
- distJI = disMea(centriods[j, :], dataSet[i, :])
- if distJI < minDist:
- minDist = distJI;
- minIndex = j
- if clusterA[i, 0] != minIndex:
- clusterC = True
-
- clusterA[i, 0] = int(minIndex)
-
- for cent in range(k):
- ptsInClust = dataSet[nonzero(clusterA[:, 0].A == cent)[0]] # get all the point in this cluster
- centriods[cent, :] = mean(ptsInClust, axis=0) # assign centroid to mean
- # print(itr)
- itr -= 1
- return centriods, clusterA
-
-
- def show1(dataSet, k, centriods, clusterA, count):
- plt.figure()
- m, n = shape(dataSet)
- mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
- for i in range(m):
- markIndex = int(clusterA[i, 0])
- plt.plot(dataSet[i, 6], dataSet[i, 9], mark[markIndex])
-
- # mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
- # for i in range(k):
- # plt.plot(centriods[i, showindex[6]], centriods[i, showindex[9]], mark[i], markersize=12)
- plt.savefig("Figure_"+str(count)+".png")
-
-
- def pearson_distance(vector1, vector2):
- from scipy.spatial.distance import pdist
- X = vstack([vector1, vector2])
- return pdist(X)
-
-
- def totalcost(blogwords, medoids_idx):
- distances_cache = { }
- size = shape(blogwords)[0]
- total_cost = 0.0
- medoids = { }
- for idx in medoids_idx:
- medoids[idx] = []
- for i in range(size):
- choice = None
- min_cost = inf
- for m in medoids:
- tmp = distances_cache.get((m, i), None)
- if tmp == None:
- tmp = pearson_distance(blogwords[m], blogwords[i])
- distances_cache[(m, i)] = tmp
- if tmp < min_cost:
- choice = m
- min_cost = tmp
- medoids[choice].append(i)
- total_cost += min_cost
- return total_cost, medoids
-
-
- def PAM(dataSet, k):
- m, n = shape(dataSet) # 数据集的行
- iter_count = 0
- # 随机选取K个聚类中心
- CenterIndex = random.sample([i for i in range(m)], k)
- # 计算初始的代价和聚类结果
- pre_cost, medoids = totalcost(dataSet, CenterIndex)
- current_cost = inf
- best_choice = []
- best_res = { }
- itr = 5
- while itr:
- # 遍历所有中心点
- for m in medoids:
- # 逐个选取中心点的簇中的数据,进行替代计算
- for item in medoids[m]:
- # 取的点不是中心点才计算
- if item != m:
- # print("now replace is %s" % item)
- # 获取中心点m在类簇中的下标
- # print("In for CenterIndex is %s" % CenterIndex)
- idx = CenterIndex.index(m)
- # print("now will be replaced index is %s" % idx)
- # 临时记录该数据,因为要被替换进行计算
- swap_temp = CenterIndex[idx]
- # 进行替换
- CenterIndex[idx] = item
- # 替换后的代价和类簇
- tmp, medoids_ = totalcost(dataSet, CenterIndex)
- # 如果代价更小,那么就替换
- if tmp < current_cost:
- # 进行替换,中心点的修改
- best_choice = list(CenterIndex)
- # 类簇的修改
- best_res = dict(medoids_)
- # 代价的修改
- current_cost = tmp
- # 将中心点进行复原,重复上面的操作直到所有的非中心点数据计算完毕才选择一个最小的,而不是选择目前算的更小值
- CenterIndex[idx] = swap_temp
- # 若果当前计算的最好的类簇的中心和前一次的中心是一样的,那么认为趋于稳定,结束计算
- if best_choice == CenterIndex:
- break
- # 否则那么更新,重复上面的步骤
- if current_cost <= pre_cost:
- pre_cost = current_cost
- medoids = best_res
- CenterIndex = best_choice
- itr -= 1
- print(itr)
- # 返回最小代价,中心点,划分的聚类结果
- # current_cost, best_choice, best_res
- m, n = shape(dataSet)
- centriods = mat(zeros((k, n)))
- for i in range(k):
- centriods[i, :] = dataSet[best_choice[i], :]
- clusterA = mat(zeros((m, 1)))
- n = 0
- for i in list(best_res.keys()):
- for j in best_res[i]:
- clusterA[j, 0] = n
- n += 1
- return centriods, clusterA
-
-
-
- def fun1(count): # waveform + kmeans
- dataset = pd.read_csv('waveform.csv',header=None)
- data = mat(dataset)[:,1:22]
- myCentroids, clustAssing = kmeans(data, 3)
- show1(data, 3, myCentroids, clustAssing, count)
-
- def fun2(count): # gauss_noise + waveform + kmeans
- dataset = pd.read_csv('waveform.csv',header=None)
- data = mat(dataset)[:,1:22]
- data = data_gauss_noise(data)
- myCentroids, clustAssing = kmeans(data, 3)
- show1(data, 3, myCentroids, clustAssing, count)
-
- def fun3(count): # lena + kmeans
- q = Image.open('lena.jpg')
- q = q.convert('L')
- q.save("lena_1.png")
- m, n = q.size
- q1 = array(q)
- q1 = q1.reshape((m * n, 1))
- k = 3
- Centroids, clustAssing = kmeans(q1, k)
- y_new = array(clustAssing).reshape((n, m)).astype(int16)
- pic_new = Image.new("L", (m, n))
- for i in range(m):
- for j in range(n):
- pic_new.putpixel((i, j), tuple([int(x) for x in Centroids[y_new[j][i]]]))
- pic_new.save("Figure_"+str(count)+".png")
-
-
- def fun4(count): # gauss_noise + lena + kmeans
- q = Image.open('lena.jpg')
- q = q.convert('L')
- gauss_img = image_gauss_noise(np.array(q))
- q = Image.fromarray(gauss_img)
- q.save("lena_2.png")
- m, n = q.size
- q1 = array(q)
- q1 = q1.reshape((m * n, 1))
- k = 3
- Centroids, clustAssing = kmeans(q1, k)
- y_new = array(clustAssing).reshape((n, m)).astype(int16)
- pic_new = Image.new("L", (m, n))
- for i in range(m):
- for j in range(n):
- pic_new.putpixel((i, j), tuple([int(x) for x in Centroids[y_new[j][i]]]))
- pic_new.save("Figure_"+str(count)+".png")
-
-
- def fun5(count): # waveform + kmeans
- dataset = pd.read_csv('waveform.csv', header=None)
- data = mat(dataset)[:, 1:22]
- myCentroids, clustAssing = PAM(data, 3)
- show1(data, 3, myCentroids, clustAssing, count)
-
-
- def fun6(count): # gauss_noise + waveform + kmeans
- dataset = pd.read_csv('waveform.csv', header=None)
- data = mat(dataset)[:, 1:22]
- data = data_gauss_noise(data)
- myCentroids, clustAssing = PAM(data, 3)
- show1(data, 3, myCentroids, clustAssing, count)
-
-
- def fun7(count): # lena + kmeans
- q = Image.open('lena.jpg')
- q = q.convert('L')
- m, n = q.size
- q1 = array(q)
- q1 = q1.reshape((m * n, 1))
- k = 3
- Centroids, clustAssing = PAM(q1, k)
- y_new = array(clustAssing).reshape((n, m)).astype(int16)
- pic_new = Image.new("L", (m, n))
- for i in range(m):
- for j in range(n):
- pic_new.putpixel((i, j), tuple([int(x) for x in Centroids[y_new[j][i]]]))
- pic_new.save("Figure_"+str(count)+".png")
-
-
- def fun8(count): # gauss_noise + lena + kmeans
- q = Image.open('lena.jpg')
- q = q.convert('L')
- gauss_img = image_gauss_noise(np.array(q))
- q = Image.fromarray(gauss_img)
- m, n = q.size
- q1 = array(q)
- q1 = q1.reshape((m * n, 1))
- k = 3
- Centroids, clustAssing = PAM(q1, k)
- y_new = array(clustAssing).reshape((n, m)).astype(int16)
- pic_new = Image.new("L", (m, n))
- for i in range(m):
- for j in range(n):
- pic_new.putpixel((i, j), tuple([int(x) for x in Centroids[y_new[j][i]]]))
- pic_new.save("Figure_"+str(count)+".png")
-
-
- if '__main__' == __name__:
- fun1(1)
- fun2(2)
- fun3(3)
- fun4(4)
- fun5(5)
- fun6(6)
- fun7(7)
- fun8(8)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。