From 020c5df2758d299f72d4badc98f8255edfa61b3a Mon Sep 17 00:00:00 2001 From: Aaron LI Date: Mon, 16 Oct 2017 10:59:31 +0800 Subject: Move some scripts --- cluster/kMeans.py | 76 ------------------------------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 cluster/kMeans.py (limited to 'cluster') diff --git a/cluster/kMeans.py b/cluster/kMeans.py deleted file mode 100644 index f4868c6..0000000 --- a/cluster/kMeans.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# Credit: Machine Learning in Action: Chapter 10 -# -# Aaron LI -# 2015/06/23 -# - -""" -k-means clustering algorithm -""" - - -import numpy as np - - -def loadDataSet(fileName): - dataMat = [] - fr = open(fileName) - for line in fr.readlines(): - curLine = line.strip().split('\t') - fltLine = list(map(float, curLine)) - dataMat.append(fltLine) - return np.array(dataMat) - - -def distEclud(vecA, vecB): - return np.sqrt(np.sum(np.power(vecA - vecB, 2))) - - -def randCent(dataSet, k): - n = np.shape(dataSet)[1] - centroids = np.zeros((k, n)) - for j in range(n): - minJ = np.min(dataSet[:, j]) - rangeJ = float(np.max(dataSet[:, j]) - minJ) - centroids[:, j] = minJ + rangeJ * np.random.rand(k) - return centroids - - -def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent): - m = np.shape(dataSet)[0] - clusterAssment = np.zeros((m, 2)) - centroids = createCent(dataSet, k) - clusterChanged = True - iterations = 0 - while clusterChanged: - clusterChanged = False - iterations += 1 - for i in range(m): - minDist = np.inf - minIndex = -1 - # to find the nearest centroid - for j in range(k): - distJI = distMeas(centroids[j, :], dataSet[i, :]) - if distJI < minDist: - minDist = distJI - minIndex = j - if clusterAssment[i, 0] != minIndex: - clusterChanged = True - clusterAssment[i, :] = minIndex, minDist**2 - #print(centroids) - for cent in range(k): - ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0] == cent)] - centroids[cent, :] = np.mean(ptsInClust, axis=0) - result = { - 'k': k, - 'centroids': centroids, - 'labels': clusterAssment[:, 0].astype(int), - 'distance2': clusterAssment[:, 1], - 'accessment': clusterAssment, - 'iterations': iterations - } - return result - -- cgit v1.2.2