using System;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using DbscanImplementation.Eventing;
using DbscanImplementation.ResultBuilding;
namespace DbscanImplementation
{
///
/// DBSCAN algorithm implementation type
///
/// Takes dataset item row (features, preferences, vector)
public class DbscanAlgorithm
{
///
/// distance calculation metric function between two feature
///
public readonly Func MetricFunction;
///
/// Curried Function that checking two feature as neighbor
///
public readonly Func, bool>> RegionQueryPredicate;
private readonly IDbscanEventPublisher publisher;
///
/// Takes metric function to compute distances between two
///
///
public DbscanAlgorithm(Func metricFunc)
{
MetricFunction = metricFunc ?? throw new ArgumentNullException(nameof(metricFunc));
RegionQueryPredicate =
(mainFeature, epsilon) => relatedPoint => MetricFunction(mainFeature, relatedPoint.Feature) <= epsilon;
publisher = new EmptyDbscanEventPublisher();
}
public DbscanAlgorithm(Func metricFunc, IDbscanEventPublisher publisher)
: this(metricFunc)
{
this.publisher = publisher ?? throw new ArgumentNullException(nameof(publisher));
}
public Task> ComputeClusterDbscanAsync(TF[] allPoints, double epsilon, int minimumPoints,
CancellationToken cancellationToken)
{
return Task.Factory.StartNew(() =>
ComputeClusterDbscan(allPoints, epsilon, minimumPoints),
cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Current);
}
///
/// Performs the DBSCAN clustering algorithm.
///
/// feature set
/// Desired region ball radius
/// Minimum number of points to be in a region
/// Overall result of cluster compute operation
public DbscanResult ComputeClusterDbscan(TF[] allPoints, double epsilon, int minimumPoints)
{
if (epsilon <= 0)
{
throw new ArgumentOutOfRangeException(nameof(epsilon), "Must be greater than zero");
}
if (minimumPoints <= 0)
{
throw new ArgumentOutOfRangeException(nameof(minimumPoints), "Must be greater than zero");
}
var allPointsDbscan = allPoints.Select(x => new DbscanPoint(x)).ToArray();
int clusterId = 0;
var computeId = Guid.NewGuid();
publisher.Publish(new ComputeStarted(computeId));
for (int i = 0; i < allPointsDbscan.Length; i++)
{
var currentPoint = allPointsDbscan[i];
if (currentPoint.PointType.HasValue)
{
publisher.Publish(new PointAlreadyProcessed(currentPoint));
continue;
}
publisher.Publish(new PointProcessStarted(currentPoint));
publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints));
var neighborPoints = RegionQuery(allPointsDbscan, currentPoint.Feature, epsilon);
publisher.Publish(new RegionQueryFinished(currentPoint, neighborPoints));
if (neighborPoints.Length < minimumPoints)
{
currentPoint.PointType = PointType.Noise;
publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Noise));
publisher.Publish(new PointProcessFinished(currentPoint));
continue;
}
clusterId++;
currentPoint.ClusterId = clusterId;
currentPoint.PointType = PointType.Core;
publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core));
publisher.Publish(new PointProcessFinished(currentPoint));
publisher.Publish(
new ClusteringStarted(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints));
ExpandCluster(allPointsDbscan, neighborPoints, clusterId, epsilon, minimumPoints);
publisher.Publish(
new ClusteringFinished(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints));
}
publisher.Publish(new ComputeFinished(computeId));
var resultBuilder = new DbscanResultBuilder();
foreach (var p in allPointsDbscan)
{
resultBuilder.Process(p);
}
return resultBuilder.Result;
}
///
/// Checks current cluster for expanding it
///
/// Dataset
/// other points in same region
/// given clusterId
/// Desired region ball radius
/// Minimum number of points to be in a region
private void ExpandCluster(DbscanPoint[] allPoints, DbscanPoint[] neighborPoints,
int clusterId, double epsilon, int minimumPoints)
{
for (int i = 0; i < neighborPoints.Length; i++)
{
var currentPoint = neighborPoints[i];
publisher.Publish(new PointProcessStarted(currentPoint));
if (currentPoint.PointType == PointType.Noise)
{
currentPoint.ClusterId = clusterId;
currentPoint.PointType = PointType.Border;
publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border));
publisher.Publish(new PointProcessFinished(currentPoint));
continue;
}
if (currentPoint.PointType.HasValue)
{
publisher.Publish(new PointAlreadyProcessed(currentPoint));
continue;
}
currentPoint.ClusterId = clusterId;
publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints));
var otherNeighborPoints = RegionQuery(allPoints, currentPoint.Feature, epsilon);
publisher.Publish(new RegionQueryFinished(currentPoint, otherNeighborPoints));
if (otherNeighborPoints.Length < minimumPoints)
{
currentPoint.PointType = PointType.Border;
publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border));
publisher.Publish(new PointProcessFinished(currentPoint));
continue;
}
currentPoint.PointType = PointType.Core;
publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core));
publisher.Publish(new PointProcessFinished(currentPoint));
neighborPoints = neighborPoints.Union(otherNeighborPoints).ToArray();
}
}
///
/// Checks and searchs neighbor points for given point
///
/// Dbscan points converted from feature set
/// Focused feature to be searched neighbors
/// Desired region ball radius
/// Calculated neighbor points
public DbscanPoint[] RegionQuery(DbscanPoint[] allPoints, TF mainFeature, double epsilon)
{
return allPoints.Where(RegionQueryPredicate(mainFeature, epsilon)).ToArray();
}
}
}