elbow_curve.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import time
  2. from warnings import simplefilter
  3. import numpy as np
  4. from joblib import Parallel, delayed
  5. from sklearn.base import clone
  6. import wandb
  7. # ignore all future warnings
  8. simplefilter(action="ignore", category=FutureWarning)
  9. def elbow_curve(clusterer, X, cluster_ranges, n_jobs, show_cluster_time): # noqa: N803
  10. if cluster_ranges is None:
  11. cluster_ranges = range(1, 10, 2)
  12. else:
  13. cluster_ranges = sorted(cluster_ranges)
  14. clfs, times = _compute_results_parallel(n_jobs, clusterer, X, cluster_ranges)
  15. clfs = np.absolute(clfs)
  16. table = make_table(cluster_ranges, clfs, times)
  17. chart = wandb.visualize("wandb/elbow/v1", table)
  18. return chart
  19. def make_table(cluster_ranges, clfs, times):
  20. columns = ["cluster_ranges", "errors", "clustering_time"]
  21. data = list(zip(cluster_ranges, clfs, times))
  22. table = wandb.Table(columns=columns, data=data)
  23. return table
  24. def _compute_results_parallel(n_jobs, clusterer, x, cluster_ranges):
  25. parallel_runner = Parallel(n_jobs=n_jobs)
  26. _cluster_scorer = delayed(_clone_and_score_clusterer)
  27. results = parallel_runner(_cluster_scorer(clusterer, x, i) for i in cluster_ranges)
  28. clfs, times = zip(*results)
  29. return clfs, times
  30. def _clone_and_score_clusterer(clusterer, x, n_clusters):
  31. start = time.time()
  32. clusterer = clone(clusterer)
  33. clusterer.n_clusters = n_clusters
  34. return clusterer.fit(x).score(x), time.time() - start