聚类算法k-Modes的实现
1 <?php 2 /* 3 *Kmodes算法(聚类算法的实现) 4 */ 5 6 7 /* 8 *获取簇的数目 9 */ 10 //-------------------------------------------------------------------- 11 function Category($train) 12 { 13 $category = array(NULL);//存放不同的类别 14 array_splice($category,0,1); 15 16 for($i=1;$i<count($train);$i++) 17 { 18 $flags = true;//标志,用于标记将要存入的类别是否已经存在 19 for($j=0;$j<count($category);$j++) 20 { 21 if($category[$j]==$train[$i][count($train[$i])-1]) 22 { 23 $flags = false; 24 break; 25 } 26 } 27 if($flags) 28 { 29 array_push($category,$train[$i][count($train[$i])-1]); 30 } 31 } 32 return $category; 33 } 34 //-------------------------------------------------------------------- 35 36 /* 37 *获得初始矩阵M 38 */ 39 //-------------------------------------------------------------------- 40 function first_M($train) 41 { 42 $category = Category($train); 43 $M = array(NULL); 44 array_splice($M,0,1); 45 $num = 1; 46 for($j=0;$j<count($category);$j++) 47 { 48 while($num<count($train)) 49 { 50 if($train[$num][count($train[$num])-1]==$category[$j]) 51 { 52 $temp = $train[$num]; 53 //print_r($temp); 54 array_splice($temp,0,1); 55 array_splice($temp,count($temp)-1,1); 56 array_push($M,$temp); 57 $num++; 58 break; 59 }else{ 60 $num++; 61 } 62 } 63 } 64 /* echo "<pre>"; 65 print_r($M); 66 */ 67 return $M; 68 } 69 //-------------------------------------------------------------------- 70 71 72 /* 73 *获得距离dis(ml,ei) 74 */ 75 //-------------------------------------------------------------------- 76 function dis($array,$e) 77 { 78 $temp = $array; 79 $sum = 0; 80 for($i=1;$i<count($array)-1;$i++) 81 { 82 if($array[$i]!=$e[$i-1]) 83 { 84 $sum++; 85 } 86 } 87 return $sum; 88 } 89 //-------------------------------------------------------------------- 90 91 /* 92 *获得新的矩阵W 93 */ 94 //-------------------------------------------------------------------- 95 function W($train,$M) 96 { 97 $W = array(NULL); 98 99 for($i=1;$i<count($train);$i++) 100 { 101 $flags = true; 102 $min = dis($train[$i],$M[0]); 103 for($j=2;$j<=count($M);$j++) 104 { 105 if(dis($train[$i],$M[$j-1])<$min) 106 { 107 $min = dis($train[$j],$M[$j-1]); 108 } 109 } 110 111 for($j=1;$j<=count($M);$j++) 112 { 113 if(dis($train[$i],$M[$j-1])==$min) 114 { 115 $num = $j; 116 break; 117 } 118 } 119 for($j=1;$j<=count($M);$j++) 120 { 121 122 if($j!=$num) 123 { 124 $W[$j][$i] = 0; 125 }else{ 126 $W[$j][$i] = 1; 127 } 128 129 } 130 } 131 /* 132 for($i=1;$i<=count($M);$i++) 133 { 134 $flags = true; 135 for($j=2;$j<count($train);$j++) 136 { 137 $flags = true; 138 $min = dis($train[$j],$M[$i-1]); 139 for($k=1;$k<=count($M);$k++) 140 { 141 if((dis($train[$j],$M[$k-1])<=$min)&&($k!=$i)) 142 { 143 $flags = false; 144 break; 145 } 146 } 147 if($flags) 148 { 149 $W[$i][$j] = 1; 150 }else $W[$i][$j] = 0; 151 } 152 } 153 */ 154 return $W; 155 } 156 //-------------------------------------------------------------------- 157 158 159 /* 160 *获得 F_W_M 161 */ 162 //-------------------------------------------------------------------- 163 function F_W_M($train,$M,$W) 164 { 165 $fwm = 0; 166 for($i=1;$i<=count($M);$i++) 167 { 168 for($j=1;$j<count($train);$j++) 169 { 170 $fwm += dis($train[$j],$M[$i-1])*$W[$i][$j]; 171 } 172 } 173 /* echo "<pre>"; 174 //print_r($W); 175 echo "<pre>"; 176 print_r($fwm); 177 */ 178 return $fwm; 179 } 180 //-------------------------------------------------------------------- 181 182 183 /* 184 *获得新的矩阵M单行元素 185 */ 186 //-------------------------------------------------------------------- 187 function New_SingleM($array) 188 { 189 $new_m = array(); 190 array_splice($new_m,0,1); 191 for($i=1;$i<count($array[0])-1;$i++) 192 { 193 $temp = array(); 194 array_splice($temp,0,1); 195 for($j=0;$j<count($array);$j++) 196 { 197 $flags = true; 198 for($k=0;$k<count($temp);$k++) 199 { 200 if($temp[$k][0]==$array[$j][$i]) 201 { 202 $flags = false; 203 $temp[$k][1]++; 204 } 205 } 206 if($flags) 207 { 208 array_push($temp,array($array[$j][$i],1)); 209 } 210 } 211 $max[0]=$temp[0][0]; 212 $max[1]=$temp[0][1]; 213 for($j=1;$j<count($temp);$j++) 214 { 215 if($temp[$j][1]>$max[1]) 216 { 217 $max[0]=$temp[$j][0]; 218 $max[1]=$temp[$j][1]; 219 } 220 } 221 array_push($new_m,$max[0]); 222 /* 223 echo "<pre>"; 224 print_r($temp); 225 print_r($max[0]); 226 */ 227 } 228 /* 229 echo "<pre>"; 230 print_r($new_m); 231 */ 232 return $new_m; 233 } 234 //-------------------------------------------------------------------- 235 236 237 /* 238 *获得新的矩阵M 239 */ 240 //-------------------------------------------------------------------- 241 function New_M($train,$W) 242 { 243 $new_train = array(NULL); 244 array_splice($new_train,0,1); 245 for($i=1;$i<count($W);$i++) 246 { 247 $array = array(NULL); 248 array_splice($array,0,1); 249 for($j=1;$j<=count($W[1]);$j++) 250 { 251 if($W[$i][$j]==1) 252 { 253 array_push($array,$train[$j]); 254 } 255 } 256 array_push($new_train,$array); 257 } 258 $new_M = array(); 259 array_splice($new_M,0,1); 260 for($i=0;$i<count($new_train);$i++) 261 { 262 array_push($new_M,New_SingleM($new_train[$i])); 263 } 264 /* echo "<pre>"; 265 print_r($new_train); 266 267 echo "<pre>"; 268 print_r($new_M); 269 */ 270 return $new_M; 271 } 272 //-------------------------------------------------------------------- 273 274 275 /* 276 *Kmodes算法 277 *$m,&$w,返回矩阵M,W 278 */ 279 //-------------------------------------------------------------------- 280 function Kmodes($train,&$m,&$w) 281 { 282 $M = first_M($train); 283 $FWM = 1; 284 $FWM2 =0; 285 while(abs($FWM2 - $FWM)>0) 286 { 287 $W = W($train,$M); 288 $FWM = F_W_M($train,$M,$W); 289 $M = New_M($train,$W); 290 $FWM2 = F_W_M($train,$M,$W); 291 292 if(abs($FWM2 - $FWM )>0) 293 { 294 $FWM = $FWM2; 295 $W = W($train,$M2); 296 $FWM2 = F_W_M($train,$M,$W); 297 } 298 } 299 $m = $M; 300 $w = $W; 301 } 302 //-------------------------------------------------------------------- 303 304 305 /* 306 *把.txt中的内容读到数组中保存 307 *$filename:文件名称 308 */ 309 //-------------------------------------------------------------------- 310 function getFileContent($filename) 311 { 312 $array = array(null); 313 $content = file_get_contents($filename); 314 $result = explode("\r\n",$content); 315 //print_r(count($result)); 316 for($j=0;$j<count($result);$j++) 317 { 318 //print_r($result[$j]."<br>"); 319 $con = explode(" ",$result[$j]); 320 array_push($array,$con); 321 } 322 array_splice($array,0,1); 323 return $array; 324 } 325 //-------------------------------------------------------------------- 326 327 328 /* 329 *把数组中内容写到.txt中保存 330 *$result:要存储的数组内容 331 *$filename:文件名称 332 */ 333 //-------------------------------------------------------------------- 334 function Array_Totxt($result,$filename) 335 { 336 $fp= fopen($filename,'wb'); 337 for($i=0;$i<count($result);$i++) 338 { 339 $temp = NULL; 340 for($j=0;$j<=count($result[$i]);$j++) 341 { 342 $temp = $result[$i][$j]."\t"; 343 fwrite($fp,$temp); 344 } 345 fwrite($fp,"\r\n"); 346 } 347 fclose($fp); 348 } 349 //-------------------------------------------------------------------- 350 $train = getFileContent("train.txt"); 351 Kmodes($train,$M,$W); 352 Array_Totxt($M,"M.txt"); 353 Array_Totxt($W,"w.txt"); 354 355 ?>
M矩阵:
W矩阵: