Move some scripts

author: Aaron LI <aly@aaronly.me> 2017-10-16 10:59:31 +0800
committer: Aaron LI <aly@aaronly.me> 2017-10-16 10:59:31 +0800
commit: 020c5df2758d299f72d4badc98f8255edfa61b3a (patch)
tree: 558ed7e4286ce88bab7f76f121987c3994a747ab /cluster/kMeans.py
parent: 2a4f07ece389ab3454afeeeced8480a1c958f8f9 (diff)
download: atoolbox-020c5df2758d299f72d4badc98f8255edfa61b3a.tar.bz2
1 files changed, 0 insertions, 76 deletions
diff --git a/cluster/kMeans.py b/cluster/kMeans.py
deleted file mode 100644
index f4868c6..0000000
--- a/cluster/kMeans.py
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# Credit: Machine Learning in Action: Chapter 10
-#
-# Aaron LI
-# 2015/06/23
-#
-
-"""
-k-means clustering algorithm
-"""
-
-
-import numpy as np
-
-
-def loadDataSet(fileName):
-    dataMat = []
-    fr = open(fileName)
-    for line in fr.readlines():
-        curLine = line.strip().split('\t')
-        fltLine = list(map(float, curLine))
-        dataMat.append(fltLine)
-    return np.array(dataMat)
-
-
-def distEclud(vecA, vecB):
-    return np.sqrt(np.sum(np.power(vecA - vecB, 2)))
-
-
-def randCent(dataSet, k):
-    n = np.shape(dataSet)[1]
-    centroids = np.zeros((k, n))
-    for j in range(n):
-        minJ = np.min(dataSet[:, j])
-        rangeJ = float(np.max(dataSet[:, j]) - minJ)
-        centroids[:, j] = minJ + rangeJ * np.random.rand(k)
-    return centroids
-
-
-def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent):
-    m = np.shape(dataSet)[0]
-    clusterAssment = np.zeros((m, 2))
-    centroids = createCent(dataSet, k)
-    clusterChanged = True
-    iterations = 0
-    while clusterChanged:
-        clusterChanged = False
-        iterations += 1
-        for i in range(m):
-            minDist = np.inf
-            minIndex = -1
-            # to find the nearest centroid
-            for j in range(k):
-                distJI = distMeas(centroids[j, :], dataSet[i, :])
-                if distJI < minDist:
-                    minDist = distJI
-                    minIndex = j
-            if clusterAssment[i, 0] != minIndex:
-                clusterChanged = True
-            clusterAssment[i, :] = minIndex, minDist**2
-        #print(centroids)
-        for cent in range(k):
-            ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0] == cent)]
-            centroids[cent, :] = np.mean(ptsInClust, axis=0)
-    result = {
-            'k': k,
-            'centroids': centroids,
-            'labels': clusterAssment[:, 0].astype(int),
-            'distance2': clusterAssment[:, 1],
-            'accessment': clusterAssment,
-            'iterations': iterations
-    }
-    return result
-
author	Aaron LI <aly@aaronly.me>	2017-10-16 10:59:31 +0800
committer	Aaron LI <aly@aaronly.me>	2017-10-16 10:59:31 +0800
commit	020c5df2758d299f72d4badc98f8255edfa61b3a (patch)
tree	558ed7e4286ce88bab7f76f121987c3994a747ab /cluster/kMeans.py
parent	2a4f07ece389ab3454afeeeced8480a1c958f8f9 (diff)
download	atoolbox-020c5df2758d299f72d4badc98f8255edfa61b3a.tar.bz2