Wednesday, November 16, 2011

Metrics, Distances and all things pretty

Today I post a self contained block of code that defines many metrics and distances often used in optimization, machine learning and computer vision.

I don't usually show my code in public...
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.Runtime.InteropServices;
using System.Text;
using System.Linq;

namespace Emgu_Tracker_Advanced_V2.Source
{
    public class MathExtended
    {
        /// <summary>
        /// Defines the different metrics for distance 
        /// </summary>
        public enum DISTANCE_METRIC { 
            BRAY_CURTIS, CANBERRA, CHESSBOARD, CORRELATION, 
            EUCLIDEAN, NORMALIZED_SQUARED_EUCLIDEAN, COSINE_SIMILARITY,
            MANHATTAN, SQUARED_EUCLIDEAN};

        /// <summary>
        /// Measure the distance of 2 n-th dimensional vectors using any of the given metrics
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <param name="_metric">Distance metric</param>
        /// <returns>The distance between the two points</returns>
        public static float Distance(float[] _vector1, float[] _vector2, DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
        {
            float result = -1.0f;
            switch (_metric)
            {
                case DISTANCE_METRIC.BRAY_CURTIS:
                    result = DistanceBrayCurtis(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.CANBERRA:
                    result = DistanceCanberra(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.CHESSBOARD:
                    result = DistanceChessboard(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.EUCLIDEAN:
                    result = DistanceEuclidean(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.NORMALIZED_SQUARED_EUCLIDEAN:
                    result = DistanceNormalizedSquaredEuclidean(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.COSINE_SIMILARITY:
                    result = DistanceCosineSimilarity(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.CORRELATION:
                    result = DistanceCorrelation(_vector1, _vector2);
                    break;
                case DISTANCE_METRIC.MANHATTAN:
                    result = DistanceManhattan(_vector1,_vector2);
                    break;
                case DISTANCE_METRIC.SQUARED_EUCLIDEAN:
                    result = DistanceSquaredEuclidean(_vector1, _vector2);
                    break;
                default:
                    result = -1;
                    break; 
            }
            return result;
        }

        /// <summary>
        /// Euclidean distance between 2 2-dimensional points
        /// </summary>
        /// <param name="_point1">Point 1</param>
        /// <param name="_point2">Point 2</param>
        /// <returns>Euclidean distance</returns>
        public static float Distance(Point _point1, Point _point2)
        {
            float xDistance = _point1.X - _point2.X;
            float yDistance = _point1.Y - _point2.Y;
            return (float)System.Math.Sqrt(xDistance * xDistance + yDistance * yDistance);
        }

        /// <summary>
        /// Euclidean distance between 2 2-dimensional points
        /// </summary>
        /// <param name="_point1">Point 1</param>
        /// <param name="_point2">Point 2</param>
        /// <returns>Euclidean distance</returns>
        public static float Distance(PointF _point1, PointF _point2)
        {
            float xDistance = _point1.X - _point2.X;
            float yDistance = _point1.Y - _point2.Y;
            return (float)System.Math.Sqrt(xDistance * xDistance + yDistance * yDistance);
        }

        /// <summary>
        /// Euclidean length of n-th dimensional vector
        /// </summary>
        /// <param name="_vector">Vector</param>
        /// <returns>Length of vector</returns>
        public static float Length(float[] _vector)
        {
            float sum = 0.0f;
            if (_vector == null) 
                return -1.0f;
            for (int i = 0; i < _vector.Length; i++)
                sum += _vector[i] * _vector[i];
            return (float)Math.Sqrt(sum);
        }

        /// <summary>
        /// Run through a list of points and find the closest one to the input vector
        /// The metric type is  euclidean distance
        /// </summary>
        /// <param name="point">The point to search for</param>
        /// <param name="pointList">The distance metric</param>
        /// <returns>The minimum distance point</returns>
        public static PointF NearestNeighbour(PointF point, PointF[] pointList)
        {
            return pointList.OrderBy(fromPoint => Distance(point, fromPoint)).First();
        }

        /// <summary>
        /// Run through an array of float vectors and find the nearest one 
        /// to the input vector, given the distance metric type
        /// </summary>
        /// <param name="_vector">The vector to search for</param>
        /// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
        /// <param name="_metric">The distance metric</param>
        /// <returns>The minimum distance vector</returns>
        public static float[] NearestNeighbour(
            float[] _vector, 
            float[][] _vectorsArray,
            DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
        {
            return _vectorsArray.OrderBy(fromVector => Distance(_vector, fromVector, _metric)).First();
        }

        /// <summary>
        /// Run through an array of float vectors and find the nearest k vectors 
        /// to the input vector, given the distance metric type
        /// </summary>
        /// <param name="_vector">The vector to search for</param>
        /// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
        /// <param name="_metric">The distance metric</param>
        /// <returns>The nearest neighbours in (distance vector) key pairs</returns>
        public static KeyValuePair<float, float[]>[] KNearestNeighbour(
            float[] _vector,
            float[][] _vectorsArray,
            int k = 1,
            DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
        {
            SortedList<float, float[]> sortedDistances = new SortedList<float, float[]>(_vectorsArray.Length);
            for (int i = 0; i < _vectorsArray.Length; i++)
                sortedDistances.Add(Distance(_vector, _vectorsArray[i], _metric), _vectorsArray[i]);
            return sortedDistances.Take(k).ToArray<KeyValuePair<float, float[]>>();
        }

        /// <summary>
        /// Run through an array of float vectors and find the nearest k vectors 
        /// to the input vector, given the distance metric type
        /// </summary>
        /// <param name="_vector">The vector to search for</param>
        /// <param name="_vectorsArray">The array of vectors to find the minimum distance to</param>
        /// <param name="_metric">The distance metric</param>
        /// <returns>The nearest neighbours in (distance index) key pairs</returns>
        public static KeyValuePair<float, int>[] KNearestNeighboursIndex(
            float[] _vector,
            float[][] _vectorsArray,
            int k = 1,
            DISTANCE_METRIC _metric = DISTANCE_METRIC.EUCLIDEAN)
        {
            SortedList<float, int> sortedDistances = new SortedList<float, int>(_vectorsArray.Length);
            for (int i = 0; i < _vectorsArray.Length; i++)
                sortedDistances.Add(Distance(_vector, _vectorsArray[i], _metric), i);
            return sortedDistances.Take(k).ToArray<KeyValuePair<float,int>>();
        }

        /// <summary>
        /// Sum of absolute differences of two n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Sum of absolute differences else if error -1</returns>
        public static float DistanceManhattan(float[] _vector1, float[] _vector2)
        {
            float sum = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
                sum += Math.Abs(_vector1[i] - _vector2[i]);
            return sum;
        }

        /// <summary>
        /// Sum of squared differences of two n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Sum of squared differences else if error -1</returns>
        public static float DistanceSquaredEuclidean(float[] _vector1, float[] _vector2)
        {
            float sum = 0.0f, tmp = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
            {
                tmp = (_vector1[i] - _vector2[i]);
                sum += tmp * tmp;
            }
            return sum;
        }

        /// <summary>
        /// Cosine similarity is a measure of similarity between 
        /// two n-th dimensional vectors vectors by measuring the cosine of the angle between them
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Cosine similarity else if error -1</returns>
        public static float DistanceCosineSimilarity(float[] _vector1, float[] _vector2)
        {
            float sum = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
                sum += (_vector1[i] * _vector2[i]);
            return sum / (MathExtended.Length(_vector1) * MathExtended.Length(_vector2));
        }

        /// <summary>
        /// Euclidean distance between 2 n-dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Euclidean distance</returns>
        public static float DistanceEuclidean(float[] _vector1, float[] _vector2)
        {
            float tmp, sum = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
            {
                tmp = _vector1[i] - _vector2[i];
                sum += tmp * tmp;
            }
            return (float)Math.Sqrt(sum);
        }

        /// <summary>
        /// Normalized squared Euclidean distance between 2 n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Normalized squared Euclidean distance else if error -1</returns>
        public static float DistanceNormalizedSquaredEuclidean(float[] _vector1, float[] _vector2)
        {
            float sum1 = 0.0f, sum2 = 0.0f, sumTotal1 = 0.0f, sumTotal2 = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
            {
                sum1 += _vector1[i];
                sum2 += _vector2[i];
            }
            sum1 /= _vector1.Length;
            sum2 /= _vector1.Length;
            for (int i = 0; i < _vector1.Length; i++)
            {
                sumTotal1 += (float)Math.Pow(_vector1[i] - sum1 - _vector2[i] + sum2, 2);
                sumTotal2 += (float)Math.Pow(_vector1[i] - sum1, 2) + (float)Math.Pow(_vector2[i] - sum2, 2);
            }
            return sumTotal1 / (2.0f * sumTotal2);
        }

        /// <summary>
        /// Gives the correlation distance between 2 n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Correlation distance else if error -1</returns>
        public static float DistanceCorrelation(float[] _vector1, float[] _vector2)
        {
            float sum1 = 0.0f, sum2 = 0.0f, sumTotal1 = 0.0f, sumTotal2 = 0.0f, sumTotal3 = 0.0f, tmp1 = 0.0f, tmp2 = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
            {
                sum1 += _vector1[i];
                sum2 += _vector2[i];
            }
            sum1 /= _vector1.Length;
            sum2 /= _vector1.Length;
            for (int i = 0; i < _vector1.Length; i++)
            {
                tmp1 = (_vector1[i] - sum1);
                tmp2 = (_vector2[i] - sum2);
                sumTotal1 += tmp1 * tmp2;
                sumTotal2 += tmp1 * tmp1;
                sumTotal3 += tmp2 * tmp2;
            }
            sumTotal2 = (float)Math.Sqrt(sumTotal2);
            sumTotal3 = (float)Math.Sqrt(sumTotal3);
            return 1.0f - sumTotal1 / (sumTotal2 * sumTotal3);
        }

        /// <summary>
        /// Gives the canberra distance between 2 n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Canberra distance else if error -1</returns>
        public static float DistanceCanberra(float[] _vector1, float[] _vector2)
        {
            float sum = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
                sum += (float)Math.Abs(_vector1[i] - _vector2[i]) / 
                    (float)(Math.Abs(_vector1[i]) + Math.Abs(_vector2[i]));
            return sum;
        }

        /// <summary>
        /// Gives the BrayCurtis distance between 2 n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>BrayCurtis distance else if error -1</returns>
        public static float DistanceBrayCurtis(float[] _vector1, float[] _vector2)
        {
            float sum1 = 0.0f, sum2 = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
            {
                sum1 += (float)Math.Abs(_vector1[i] - _vector2[i]);
                sum2 += (float)Math.Abs(_vector1[i] + _vector2[i]);
            }
            return sum1 / sum2;
        }

        /// <summary>
        /// Gives the Chessboard distance between 2 n-th dimensional vectors
        /// </summary>
        /// <param name="_vector1">Vector 1</param>
        /// <param name="_vector2">Vector 2</param>
        /// <returns>Chessboard distance else if error -1</returns>
        public static float DistanceChessboard(float[] _vector1, float[] _vector2)
        {
            float tmp = 0.0f;
            if (_vector1 == null || _vector2 == null)
                return -1.0f;
            if (_vector1.Length != _vector2.Length)
                return -1.0f;
            for (int i = 0; i < _vector1.Length; i++)
                tmp = (float)(Math.Max(Math.Abs(_vector1[i]-_vector2[2]), tmp));
            return tmp;
        }
    }
}