package tutorial.clustering; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.Result; public class NaiveAgglomerativeHierarchicalClustering> extends AbstractDistanceBasedAlgorithm { protected NaiveAgglomerativeHierarchicalClustering(DistanceFunction distanceFunction) { super(distanceFunction); // TODO Auto-generated constructor stub } public TypeInformation[] getInputTypeRestriction() { // TODO Auto-generated method stub return null; } protected Logging getLogger() { // TODO Auto-generated method stub return null; } } /** * Static class logger. */ private static final Logging LOG = Logging.getLogger(NaiveAgglomerativeHierarchicalClustering.class); protected Logging getLogger() { return LOG; } public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array( getDistanceFunction().getInputTypeRestriction() ); } // The run method public Result run(Database db, Relation relation) { return null; } DistanceQuery dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!"); Computing the distance matrix double[][] matrix = new double[size][size]; DBIDArrayIter ix = ids.iter(), iy = ids.iter(); for (int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); for (int y = 0; y < x; y++, iy.advance()) { final double dist = dq.distance(ix, iy).doubleValue(); matrix[x][y] = dist; matrix[y][x] = dist; } } Algorithm main loop final int stop = size - numclusters; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null; for (int i = 0; i < stop; i++) { // TODO: find clusters to merge // TODO: store the merge in auxillary data // TODO: update distance matrix if (prog != null) { prog.incrementProcessed(LOG); } } if (prog != null) { prog.ensureCompleted(LOG); } double min = Double.POSITIVE_INFINITY; int minx = -1, miny = -1; for (int x = 0; x < size; x++) { if (height[x] < Double.POSITIVE_INFINITY) { continue; } for (int y = 0; y < x; y++) { if (height[y] < Double.POSITIVE_INFINITY) { continue; } if (matrix[x][y] < min) { min = matrix[x][y]; minx = x; miny = y; } } } // Avoid allocating memory, by reusing existing iterators: ix.seek(minx); iy.seek(miny); // Perform merge in data structure: x -> y // Since y < x, prefer keeping y, dropping x. height[minx] = min; parent.set(minx, iy); // Merge into cluster ModifiableDBIDs cx = clusters.get(minx); ModifiableDBIDs cy = clusters.get(miny); if (cy == null) { cy = DBIDUtil.newHashSet(); cy.add(iy); } if (cx == null) { cy.add(ix); } else { cy.addDBIDs(cx); clusters.remove(minx); } clusters.put(miny, cy); // Update distance matrix for y: for (int j = 0; j < size; j++) { matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]); matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]); } Returning a Clustering final Clustering dendrogram = new Clustering<>( "Hierarchical-Clustering", "hierarchical-clustering"); for (int x = 0; x < size; x++) { if (height[x] < Double.POSITIVE_INFINITY) { continue; } DBIDs cids = clusters.get(x); // For singleton objects, this may be null. if (cids == null) { ix.seek(x); cids = DBIDUtil.deref(ix); } Cluster cluster = new Cluster<>("Cluster", cids); dendrogram.addToplevelCluster(cluster); } return dendrogram; Updating the constructor /** * Threshold, how many clusters to extract. */ int numclusters; public NaiveAgglomerativeHierarchicalClustering( DistanceFunction distanceFunction, int numclusters) { super(distanceFunction); this.numclusters = numclusters; } Adding a Parameterizer public static class Parameterizer> extends AbstractDistanceBasedAlgorithm.Parameterizer { int numclusters = 0; protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter numclustersP = new IntParameter(SLINK.Parameterizer.SLINK_MINCLUSTERS_ID); numclustersP.addConstraint(new GreaterEqualConstraint(1)); if (config.grab(numclustersP)) { numclusters = numclustersP.intValue(); } } protected NaiveAgglomerativeHierarchicalClustering makeInstance() { return new NaiveAgglomerativeHierarchicalClustering<>(distanceFunction, numclusters); } }