import numpy as np from scipy.cluster.hierarchy import dendrogram, fcluster, linkage labels = np.array( [ "web-1", "web-2", "web-3", "cache-1", "cache-2", "cache-3", "db-1", "db-2", "db-3", ] ) features = np.array( [ [0.0, 0.1], [0.2, -0.1], [-0.2, 0.0], [0.1, 4.8], [-0.1, 5.1], [0.3, 5.0], [5.1, 4.9], [4.8, 5.2], [5.0, 5.1], ] ) linkage_matrix = linkage(features, method="ward", metric="euclidean") cluster_ids = fcluster(linkage_matrix, t=3, criterion="maxclust") tree = dendrogram(linkage_matrix, labels=labels, no_plot=True) print(f"observations: {len(labels)}") print(f"linkage_shape: {linkage_matrix.shape}") print("first_merges:") for merge_index, row in enumerate(linkage_matrix[:4]): left, right, distance, count = row print( f" {merge_index}: left={int(left)} right={int(right)} " f"distance={distance:.3f} count={int(count)}" ) print("flat_clusters:") for cluster_id in sorted(set(cluster_ids)): members = labels[cluster_ids == cluster_id] print(f" {cluster_id}: {', '.join(members)}") expected_groups = { frozenset(["web-1", "web-2", "web-3"]), frozenset(["cache-1", "cache-2", "cache-3"]), frozenset(["db-1", "db-2", "db-3"]), } observed_groups = { frozenset(labels[cluster_ids == cluster_id]) for cluster_id in set(cluster_ids) } print(f"dendrogram_leaf_order: {', '.join(tree['ivl'])}") print(f"expected_groups_match: {observed_groups == expected_groups}")