## Wednesday, November 16, 2011

### Metrics, Distances and all things pretty

Today I post a self contained block of code that defines many metrics and distances often used in optimization, machine learning and computer vision.

I don't usually show my code in public...
``````﻿using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.Runtime.InteropServices;
using System.Text;
using System.Linq;

{
public class MathExtended
{
/// <summary>
/// Defines the different metrics for distance
/// </summary>
public enum DISTANCE_METRIC {
BRAY_CURTIS, CANBERRA, CHESSBOARD, CORRELATION,
EUCLIDEAN, NORMALIZED_SQUARED_EUCLIDEAN, COSINE_SIMILARITY,
MANHATTAN, SQUARED_EUCLIDEAN};

/// <summary>
/// Measure the distance of 2 n-th dimensional vectors using any of the given metrics
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <param name="_metric">Distance metric</param>
/// <returns>The distance between the two points</returns>
public static float Distance(float[] _vector1, float[] _vector2, DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
{
float result = -1.0f;
switch (_metric)
{
case DISTANCE_METRIC.BRAY_CURTIS:
result = DistanceBrayCurtis(_vector1, _vector2);
break;
case DISTANCE_METRIC.CANBERRA:
result = DistanceCanberra(_vector1, _vector2);
break;
case DISTANCE_METRIC.CHESSBOARD:
result = DistanceChessboard(_vector1, _vector2);
break;
case DISTANCE_METRIC.EUCLIDEAN:
result = DistanceEuclidean(_vector1, _vector2);
break;
case DISTANCE_METRIC.NORMALIZED_SQUARED_EUCLIDEAN:
result = DistanceNormalizedSquaredEuclidean(_vector1, _vector2);
break;
case DISTANCE_METRIC.COSINE_SIMILARITY:
result = DistanceCosineSimilarity(_vector1, _vector2);
break;
case DISTANCE_METRIC.CORRELATION:
result = DistanceCorrelation(_vector1, _vector2);
break;
case DISTANCE_METRIC.MANHATTAN:
result = DistanceManhattan(_vector1,_vector2);
break;
case DISTANCE_METRIC.SQUARED_EUCLIDEAN:
result = DistanceSquaredEuclidean(_vector1, _vector2);
break;
default:
result = -1;
break;
}
return result;
}

/// <summary>
/// Euclidean distance between 2 2-dimensional points
/// </summary>
/// <param name="_point1">Point 1</param>
/// <param name="_point2">Point 2</param>
/// <returns>Euclidean distance</returns>
public static float Distance(Point _point1, Point _point2)
{
float xDistance = _point1.X - _point2.X;
float yDistance = _point1.Y - _point2.Y;
return (float)System.Math.Sqrt(xDistance * xDistance + yDistance * yDistance);
}

/// <summary>
/// Euclidean distance between 2 2-dimensional points
/// </summary>
/// <param name="_point1">Point 1</param>
/// <param name="_point2">Point 2</param>
/// <returns>Euclidean distance</returns>
public static float Distance(PointF _point1, PointF _point2)
{
float xDistance = _point1.X - _point2.X;
float yDistance = _point1.Y - _point2.Y;
return (float)System.Math.Sqrt(xDistance * xDistance + yDistance * yDistance);
}

/// <summary>
/// Euclidean length of n-th dimensional vector
/// </summary>
/// <param name="_vector">Vector</param>
/// <returns>Length of vector</returns>
public static float Length(float[] _vector)
{
float sum = 0.0f;
if (_vector == null)
return -1.0f;
for (int i = 0; i < _vector.Length; i++)
sum += _vector[i] * _vector[i];
return (float)Math.Sqrt(sum);
}

/// <summary>
/// Run through a list of points and find the closest one to the input vector
/// The metric type is  euclidean distance
/// </summary>
/// <param name="point">The point to search for</param>
/// <param name="pointList">The distance metric</param>
/// <returns>The minimum distance point</returns>
public static PointF NearestNeighbour(PointF point, PointF[] pointList)
{
return pointList.OrderBy(fromPoint => Distance(point, fromPoint)).First();
}

/// <summary>
/// Run through an array of float vectors and find the nearest one
/// to the input vector, given the distance metric type
/// </summary>
/// <param name="_vector">The vector to search for</param>
/// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
/// <param name="_metric">The distance metric</param>
/// <returns>The minimum distance vector</returns>
public static float[] NearestNeighbour(
float[] _vector,
float[][] _vectorsArray,
DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
{
return _vectorsArray.OrderBy(fromVector => Distance(_vector, fromVector, _metric)).First();
}

/// <summary>
/// Run through an array of float vectors and find the nearest k vectors
/// to the input vector, given the distance metric type
/// </summary>
/// <param name="_vector">The vector to search for</param>
/// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
/// <param name="_metric">The distance metric</param>
/// <returns>The nearest neighbours in (distance vector) key pairs</returns>
public static KeyValuePair<float, float[]>[] KNearestNeighbour(
float[] _vector,
float[][] _vectorsArray,
int k = 1,
DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
{
SortedList<float, float[]> sortedDistances = new SortedList<float, float[]>(_vectorsArray.Length);
for (int i = 0; i < _vectorsArray.Length; i++)
return sortedDistances.Take(k).ToArray<KeyValuePair<float, float[]>>();
}

/// <summary>
/// Run through an array of float vectors and find the nearest k vectors
/// to the input vector, given the distance metric type
/// </summary>
/// <param name="_vector">The vector to search for</param>
/// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
/// <param name="_metric">The distance metric</param>
/// <returns>The nearest neighbours in (distance index) key pairs</returns>
public static KeyValuePair<float, int>[] KNearestNeighboursIndex(
float[] _vector,
float[][] _vectorsArray,
int k = 1,
DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
{
SortedList<float, int> sortedDistances = new SortedList<float, int>(_vectorsArray.Length);
for (int i = 0; i < _vectorsArray.Length; i++)
return sortedDistances.Take(k).ToArray<KeyValuePair<float,int>>();
}

/// <summary>
/// Sum of absolute differences of two n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Sum of absolute differences else if error -1</returns>
public static float DistanceManhattan(float[] _vector1, float[] _vector2)
{
float sum = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
sum += Math.Abs(_vector1[i] - _vector2[i]);
return sum;
}

/// <summary>
/// Sum of squared differences of two n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Sum of squared differences else if error -1</returns>
public static float DistanceSquaredEuclidean(float[] _vector1, float[] _vector2)
{
float sum = 0.0f, tmp = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
{
tmp = (_vector1[i] - _vector2[i]);
sum += tmp * tmp;
}
return sum;
}

/// <summary>
/// Cosine similarity is a measure of similarity between
/// two n-th dimensional vectors vectors by measuring the cosine of the angle between them
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Cosine similarity else if error -1</returns>
public static float DistanceCosineSimilarity(float[] _vector1, float[] _vector2)
{
float sum = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
sum += (_vector1[i] * _vector2[i]);
return sum / (MathExtended.Length(_vector1) * MathExtended.Length(_vector2));
}

/// <summary>
/// Euclidean distance between 2 n-dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Euclidean distance</returns>
public static float DistanceEuclidean(float[] _vector1, float[] _vector2)
{
float tmp, sum = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
{
tmp = _vector1[i] - _vector2[i];
sum += tmp * tmp;
}
return (float)Math.Sqrt(sum);
}

/// <summary>
/// Normalized squared Euclidean distance between 2 n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Normalized squared Euclidean distance else if error -1</returns>
public static float DistanceNormalizedSquaredEuclidean(float[] _vector1, float[] _vector2)
{
float sum1 = 0.0f, sum2 = 0.0f, sumTotal1 = 0.0f, sumTotal2 = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
{
sum1 += _vector1[i];
sum2 += _vector2[i];
}
sum1 /= _vector1.Length;
sum2 /= _vector1.Length;
for (int i = 0; i < _vector1.Length; i++)
{
sumTotal1 += (float)Math.Pow(_vector1[i] - sum1 - _vector2[i] + sum2, 2);
sumTotal2 += (float)Math.Pow(_vector1[i] - sum1, 2) + (float)Math.Pow(_vector2[i] - sum2, 2);
}
return sumTotal1 / (2.0f * sumTotal2);
}

/// <summary>
/// Gives the correlation distance between 2 n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Correlation distance else if error -1</returns>
public static float DistanceCorrelation(float[] _vector1, float[] _vector2)
{
float sum1 = 0.0f, sum2 = 0.0f, sumTotal1 = 0.0f, sumTotal2 = 0.0f, sumTotal3 = 0.0f, tmp1 = 0.0f, tmp2 = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
{
sum1 += _vector1[i];
sum2 += _vector2[i];
}
sum1 /= _vector1.Length;
sum2 /= _vector1.Length;
for (int i = 0; i < _vector1.Length; i++)
{
tmp1 = (_vector1[i] - sum1);
tmp2 = (_vector2[i] - sum2);
sumTotal1 += tmp1 * tmp2;
sumTotal2 += tmp1 * tmp1;
sumTotal3 += tmp2 * tmp2;
}
sumTotal2 = (float)Math.Sqrt(sumTotal2);
sumTotal3 = (float)Math.Sqrt(sumTotal3);
return 1.0f - sumTotal1 / (sumTotal2 * sumTotal3);
}

/// <summary>
/// Gives the canberra distance between 2 n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Canberra distance else if error -1</returns>
public static float DistanceCanberra(float[] _vector1, float[] _vector2)
{
float sum = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
sum += (float)Math.Abs(_vector1[i] - _vector2[i]) /
(float)(Math.Abs(_vector1[i]) + Math.Abs(_vector2[i]));
return sum;
}

/// <summary>
/// Gives the BrayCurtis distance between 2 n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>BrayCurtis distance else if error -1</returns>
public static float DistanceBrayCurtis(float[] _vector1, float[] _vector2)
{
float sum1 = 0.0f, sum2 = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
{
sum1 += (float)Math.Abs(_vector1[i] - _vector2[i]);
sum2 += (float)Math.Abs(_vector1[i] + _vector2[i]);
}
return sum1 / sum2;
}

/// <summary>
/// Gives the Chessboard distance between 2 n-th dimensional vectors
/// </summary>
/// <param name="_vector1">Vector 1</param>
/// <param name="_vector2">Vector 2</param>
/// <returns>Chessboard distance else if error -1</returns>
public static float DistanceChessboard(float[] _vector1, float[] _vector2)
{
float tmp = 0.0f;
if (_vector1 == null || _vector2 == null)
return -1.0f;
if (_vector1.Length != _vector2.Length)
return -1.0f;
for (int i = 0; i < _vector1.Length; i++)
tmp = (float)(Math.Max(Math.Abs(_vector1[i]-_vector2[2]), tmp));
return tmp;
}
}
}

``````