using System; using System.Linq; using System.Threading; using System.Threading.Tasks; using DbscanImplementation.Eventing; using DbscanImplementation.ResultBuilding; namespace DbscanImplementation { /// /// DBSCAN algorithm implementation type /// /// Takes dataset item row (features, preferences, vector) public class DbscanAlgorithm { /// /// distance calculation metric function between two feature /// public readonly Func MetricFunction; /// /// Curried Function that checking two feature as neighbor /// public readonly Func, bool>> RegionQueryPredicate; private readonly IDbscanEventPublisher publisher; /// /// Takes metric function to compute distances between two /// /// public DbscanAlgorithm(Func metricFunc) { MetricFunction = metricFunc ?? throw new ArgumentNullException(nameof(metricFunc)); RegionQueryPredicate = (mainFeature, epsilon) => relatedPoint => MetricFunction(mainFeature, relatedPoint.Feature) <= epsilon; publisher = new EmptyDbscanEventPublisher(); } public DbscanAlgorithm(Func metricFunc, IDbscanEventPublisher publisher) : this(metricFunc) { this.publisher = publisher ?? throw new ArgumentNullException(nameof(publisher)); } public Task> ComputeClusterDbscanAsync(TF[] allPoints, double epsilon, int minimumPoints, CancellationToken cancellationToken) { return Task.Factory.StartNew(() => ComputeClusterDbscan(allPoints, epsilon, minimumPoints), cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Current); } /// /// Performs the DBSCAN clustering algorithm. /// /// feature set /// Desired region ball radius /// Minimum number of points to be in a region /// Overall result of cluster compute operation public DbscanResult ComputeClusterDbscan(TF[] allPoints, double epsilon, int minimumPoints) { if (epsilon <= 0) { throw new ArgumentOutOfRangeException(nameof(epsilon), "Must be greater than zero"); } if (minimumPoints <= 0) { throw new ArgumentOutOfRangeException(nameof(minimumPoints), "Must be greater than zero"); } var allPointsDbscan = allPoints.Select(x => new DbscanPoint(x)).ToArray(); int clusterId = 0; var computeId = Guid.NewGuid(); publisher.Publish(new ComputeStarted(computeId)); for (int i = 0; i < allPointsDbscan.Length; i++) { var currentPoint = allPointsDbscan[i]; if (currentPoint.PointType.HasValue) { publisher.Publish(new PointAlreadyProcessed(currentPoint)); continue; } publisher.Publish(new PointProcessStarted(currentPoint)); publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints)); var neighborPoints = RegionQuery(allPointsDbscan, currentPoint.Feature, epsilon); publisher.Publish(new RegionQueryFinished(currentPoint, neighborPoints)); if (neighborPoints.Length < minimumPoints) { currentPoint.PointType = PointType.Noise; publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Noise)); publisher.Publish(new PointProcessFinished(currentPoint)); continue; } clusterId++; currentPoint.ClusterId = clusterId; currentPoint.PointType = PointType.Core; publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core)); publisher.Publish(new PointProcessFinished(currentPoint)); publisher.Publish( new ClusteringStarted(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints)); ExpandCluster(allPointsDbscan, neighborPoints, clusterId, epsilon, minimumPoints); publisher.Publish( new ClusteringFinished(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints)); } publisher.Publish(new ComputeFinished(computeId)); var resultBuilder = new DbscanResultBuilder(); foreach (var p in allPointsDbscan) { resultBuilder.Process(p); } return resultBuilder.Result; } /// /// Checks current cluster for expanding it /// /// Dataset /// other points in same region /// given clusterId /// Desired region ball radius /// Minimum number of points to be in a region private void ExpandCluster(DbscanPoint[] allPoints, DbscanPoint[] neighborPoints, int clusterId, double epsilon, int minimumPoints) { for (int i = 0; i < neighborPoints.Length; i++) { var currentPoint = neighborPoints[i]; publisher.Publish(new PointProcessStarted(currentPoint)); if (currentPoint.PointType == PointType.Noise) { currentPoint.ClusterId = clusterId; currentPoint.PointType = PointType.Border; publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border)); publisher.Publish(new PointProcessFinished(currentPoint)); continue; } if (currentPoint.PointType.HasValue) { publisher.Publish(new PointAlreadyProcessed(currentPoint)); continue; } currentPoint.ClusterId = clusterId; publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints)); var otherNeighborPoints = RegionQuery(allPoints, currentPoint.Feature, epsilon); publisher.Publish(new RegionQueryFinished(currentPoint, otherNeighborPoints)); if (otherNeighborPoints.Length < minimumPoints) { currentPoint.PointType = PointType.Border; publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border)); publisher.Publish(new PointProcessFinished(currentPoint)); continue; } currentPoint.PointType = PointType.Core; publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core)); publisher.Publish(new PointProcessFinished(currentPoint)); neighborPoints = neighborPoints.Union(otherNeighborPoints).ToArray(); } } /// /// Checks and searchs neighbor points for given point /// /// Dbscan points converted from feature set /// Focused feature to be searched neighbors /// Desired region ball radius /// Calculated neighbor points public DbscanPoint[] RegionQuery(DbscanPoint[] allPoints, TF mainFeature, double epsilon) { return allPoints.Where(RegionQueryPredicate(mainFeature, epsilon)).ToArray(); } } }