public class MathUtils extends Object
| Modifier and Type | Field and Description | 
|---|---|
| static double | log2The natural logarithm of 2. | 
| static double | SMALLThe small deviation allowed in double comparisons. | 
| Constructor and Description | 
|---|
| MathUtils() | 
| Modifier and Type | Method and Description | 
|---|---|
| static double | adjustedrSquared(double rSquared,
                int numRegressors,
                int numDataPoints)This calculates the adjusted r^2 including degrees of freedom. | 
| static double | bernoullis(double n,
          double k,
          double successProb)This will return the bernoulli trial for the given event. | 
| static int | binomial(org.apache.commons.math3.random.RandomGenerator rng,
        int n,
        double p)Generates a binomial distributed number using
 the given rng | 
| org.apache.commons.math3.linear.CholeskyDecomposition | choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m)This will return the cholesky decomposition of
 the given matrix | 
| static int | clamp(int value,
     int min,
     int max)Clamps the value to a discrete value | 
| static double | combination(double n,
           double r)This returns the combination of n choose r | 
| static List<double[]> | coordSplit(double[] vector)This returns the coordinate split in a list of coordinates
 such that the values for ret[0] are the x values
 and ret[1] are the y values | 
| static List<double[]> | coordSplit(List<Double> vector)This returns the coordinate split in a list of coordinates
 such that the values for ret[0] are the x values
 and ret[1] are the y values | 
| static double | correlation(double[] residuals,
           double[] targetAttribute)Returns the correlation coefficient of two double vectors. | 
| static double | determinationCoefficient(double[] y1,
                        double[] y2,
                        int n)This returns the determination coefficient of two vectors given a length | 
| static int | discretize(double value,
          double min,
          double max,
          int binCount)Discretize the given value | 
| static int | distanceFinderZValue(double[] vector)This will translate a vector in to an equivalent integer | 
| static double | entropy(double[] vector)This returns the entropy (information gain, or uncertainty of a random variable). | 
| static double | errorFor(double actual,
        double prediction) | 
| static double | euclideanDistance(double[] p,
                 double[] q)This returns the distance of two vectors
 sum(i=1,n)   (q_i - p_i)^2 | 
| static double | euclideanDistance(float[] p,
                 float[] q)This returns the distance of two vectors
 sum(i=1,n)   (q_i - p_i)^2 | 
| static double | factorial(double n)This will return the factorial of the given number n. | 
| static double[] | fromString(String data,
          String separator)This will take a given string and separator and convert it to an equivalent
 double array. | 
| static double[] | generateUniform(int l)This will generate a series of uniformally distributed
 numbers between l times | 
| static boolean | gr(double a,
  double b)Tests if a is greater than b. | 
| static double | hypotenuse(double a,
          double b)sqrt(a^2 + b^2) without under/overflow. | 
| static double | idf(double totalDocs,
   double numTimesWordAppearedInADocument)Inverse document frequency: the total docs divided by the number of times the word
 appeared in a document | 
| static double | information(double[] probabilities)This returns the entropy for a given vector of probabilities. | 
| static int | kroneckerDelta(double i,
              double j)This returns the kronecker delta of two doubles. | 
| static double | log2(double a)Returns the logarithm of a for base 2. | 
| static double[] | logs2probs(double[] a)Converts an array containing the natural logarithms of
 probabilities stored in a vector back into probabilities. | 
| static double | manhattanDistance(double[] p,
                 double[] q)This will calculate the Manhattan distance between two sets of points. | 
| static double | max(double[] doubles) | 
| static int | maxIndex(double[] doubles)Returns index of maximum element in a given
 array of doubles. | 
| static double | mean(double[] vector)Computes the mean for an array of doubles. | 
| static double[] | mergeCoords(double[] x,
           double[] y)This will merge the coordinates of the given coordinate system. | 
| static List<Double> | mergeCoords(List<Double> x,
           List<Double> y)This will merge the coordinates of the given coordinate system. | 
| static double | min(double[] doubles) | 
| static long | nextPowOf2(long v)See: http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2 | 
| static void | normalize(double[] doubles,
         double sum)Normalizes the doubles in the array using the given value. | 
| static double | normalize(double val,
         double min,
         double max)Normalize a value
 (val - min) / (max - min) | 
| static double[] | normalizeToOne(double[] doubles) | 
| static List<List<Double>> | partitionVariable(List<Double> arr,
                 int chunk)This will partition the given whole variable data applyTransformToDestination in to the specified chunk number. | 
| static double | permutation(double n,
           double r)This returns the permutation of n choose r. | 
| static int | probRound(double value,
         Random rand)Rounds a double to the next nearest integer value in a probabilistic
 fashion (e.g. | 
| static double | probToLogOdds(double prob)Returns the log-odds for a given probability. | 
| static double | randomDoubleBetween(double begin,
                   double end) | 
| static float | randomFloatBetween(float begin,
                  float end) | 
| static int | randomNumberBetween(double begin,
                   double end)Generates a random integer between the specified numbers | 
| static int | randomNumberBetween(double begin,
                   double end,
                   org.apache.commons.math3.random.RandomGenerator rng)Generates a random integer between the specified numbers | 
| static double | rootMeansSquaredError(double[] real,
                     double[] predicted)This returns the root mean squared error of two data sets | 
| static int | round(double value)Rounds a double to the next nearest integer value. | 
| static double | roundDouble(double value,
           int afterDecimalPoint)Rounds a double to the given number of decimal places. | 
| static float | roundFloat(float value,
          int afterDecimalPoint)Rounds a double to the given number of decimal places. | 
| static double[] | sampleDoublesInInterval(double[][] doubles,
                       int l) | 
| static void | shuffleArray(int[] array,
            long rngSeed) | 
| static void | shuffleArray(int[] array,
            Random rng) | 
| static double | sigmoid(double x)1 / 1 + exp(-x) | 
| double | slope(double x1,
     double x2,
     double y1,
     double y2)This returns the slope of the given points. | 
| static boolean | sm(double a,
  double b)Tests if a is smaller than b. | 
| static double | squaredLoss(double[] x,
           double[] y,
           double w_0,
           double w_1)This will return the squared loss of the given
 points | 
| static double | ssError(double[] predictedValues,
       double[] targetAttribute)How much of the variance is NOT explained by the regression | 
| static double | ssReg(double[] residuals,
     double[] targetAttribute)How much of the variance is explained by the regression | 
| static double | ssTotal(double[] residuals,
       double[] targetAttribute)Total variance in target attribute | 
| static double | stringSimilarity(String... strings)Calculate string similarity with tfidf weights relative to each character
 frequency and how many times a character appears in a given string | 
| static double | sum(double[] nums)This returns the sum of the given array. | 
| static double | sumOfMeanDifferences(double[] vector,
                    double[] vector2)Used for calculating top part of simple regression for
 beta 1 | 
| static double | sumOfMeanDifferencesOnePoint(double[] vector)Used for calculating top part of simple regression for
 beta 1 | 
| static double | sumOfProducts(double[]... nums)This returns the sum of products for the given
 numbers. | 
| static double | sumOfSquares(double[] vector)This returns the sum of squares for the given vector. | 
| static double | tf(int count,
  int documentLength)Term frequency: 1+ log10(count) | 
| static double | tfidf(double tf,
     double idf)Return td * idf | 
| static double | times(double[] nums)This returns the product of all numbers in the given array. | 
| static int | toDecimal(String binary)This will convert the given binary string to a decimal based
 integer | 
| static double | uniform(Random rng,
       double min,
       double max)Generate a uniform random number from the given rng | 
| static double | variance(double[] vector) | 
| static double | vectorLength(double[] vector)Returns the vector length (sqrt(sum(x_i)) | 
| static double | w_0(double[] x,
   double[] y,
   int n) | 
| static double | w_1(double[] x,
   double[] y,
   int n) | 
| static double[] | weightsFor(double[] vector)This returns the minimized loss values for a given vector. | 
| static double[] | weightsFor(List<Double> vector)This returns the minimized loss values for a given vector. | 
| static double[] | xVals(double[] vector)This returns the x values of the given vector. | 
| static double[] | yVals(double[] vector)This returns the odd indexed values for the given vector | 
public static double log2
public static double SMALL
public static double normalize(double val,
                               double min,
                               double max)
val - value to normalizemax - max valuemin - min valuepublic static int clamp(int value,
                        int min,
                        int max)
value - the value to clampmin - min for the probability distributionmax - max for the probability distributionpublic static int discretize(double value,
                             double min,
                             double max,
                             int binCount)
value - the value to discretizemin - the min of the distributionmax - the max of the distributionbinCount - the number of binspublic static long nextPowOf2(long v)
v - the number to getFromOrigin the next power of 2 forpublic static int binomial(org.apache.commons.math3.random.RandomGenerator rng,
                           int n,
                           double p)
rng - n - p - public static double uniform(Random rng, double min, double max)
rng - the rng to usemin - the min nummax - the max numpublic static double correlation(double[] residuals,
                                 double[] targetAttribute)
residuals - residualstargetAttribute - target attribute vectorpublic static double sigmoid(double x)
x - public static double ssReg(double[] residuals,
                           double[] targetAttribute)
residuals - errortargetAttribute - data for target attributepublic static double ssError(double[] predictedValues,
                             double[] targetAttribute)
predictedValues - predicted valuestargetAttribute - data for target attributepublic static double stringSimilarity(String... strings)
strings - the strings to calculate similarity forpublic static double vectorLength(double[] vector)
vector - the vector to return the vector length forpublic static double idf(double totalDocs,
                         double numTimesWordAppearedInADocument)
totalDocs - the total documents for the data applyTransformToDestinationnumTimesWordAppearedInADocument - the number of times the word occurred in a documentpublic static double tf(int count,
                        int documentLength)
count - the count of a word or character in a given string or documentpublic static double tfidf(double tf,
                           double idf)
tf - the term frequency (assumed calculated)idf - inverse document frequency (assumed calculated)public static double ssTotal(double[] residuals,
                             double[] targetAttribute)
residuals - errortargetAttribute - data for target attributepublic static double sum(double[] nums)
nums - the array of numbers to sumpublic static double[] mergeCoords(double[] x,
                                   double[] y)
x - the x coordinatesy - the y coordinatespublic static List<Double> mergeCoords(List<Double> x, List<Double> y)
x - the x coordinatesy - the y coordinatespublic static double[] weightsFor(List<Double> vector)
vector - the vector of numbers to getFromOrigin the weights forpublic static double squaredLoss(double[] x,
                                 double[] y,
                                 double w_0,
                                 double w_1)
x - the x coordinates to usey - the y coordinates to usew_0 - the first weightw_1 - the second weightpublic static double w_1(double[] x,
                         double[] y,
                         int n)
public static double w_0(double[] x,
                         double[] y,
                         int n)
public static double[] weightsFor(double[] vector)
vector - the vector of numbers to getFromOrigin the weights forpublic static double errorFor(double actual,
                              double prediction)
public static double sumOfMeanDifferences(double[] vector,
                                          double[] vector2)
vector - the x coordinatesvector2 - the y coordinatespublic static double sumOfMeanDifferencesOnePoint(double[] vector)
vector - the x coordinatespublic static double variance(double[] vector)
public static double times(double[] nums)
nums - the numbers to multiply overpublic static double sumOfProducts(double[]... nums)
nums - the sum of products for the give numberspublic static List<double[]> coordSplit(double[] vector)
vector - the vector to split with x and y values/public static List<List<Double>> partitionVariable(List<Double> arr, int chunk)
arr - the data applyTransformToDestination to pass inchunk - the number to separate bypublic static List<double[]> coordSplit(List<Double> vector)
vector - the vector to split with x and y values
 Note that the list will be more stable due to the size operator.
 The array version will have extraneous values if not monitored
 properly.public static double[] xVals(double[] vector)
vector - the vector to getFromOrigin the values forpublic static double[] yVals(double[] vector)
vector - the odd indexed values of rht egiven vectorpublic static double sumOfSquares(double[] vector)
vector - the vector to obtain the sum of squares forpublic static double determinationCoefficient(double[] y1,
                                              double[] y2,
                                              int n)
y1 - the first vectory2 - the second vectorn - the length of both vectorspublic static double log2(double a)
a - a doublepublic double slope(double x1,
                    double x2,
                    double y1,
                    double y2)
x1 - the first x to usex2 - the end x to usey1 - the begin y to usey2 - the end y to usepublic static double rootMeansSquaredError(double[] real,
                                           double[] predicted)
real - the real valuespredicted - the predicted valuespublic static double entropy(double[] vector)
vector - the vector of values to getFromOrigin the entropy forpublic static int kroneckerDelta(double i,
                                 double j)
i - the first number to comparej - the second number to comparepublic static double adjustedrSquared(double rSquared,
                                      int numRegressors,
                                      int numDataPoints)
rSquared - the r squared value to calculatenumRegressors - number of variablesnumDataPoints - size of the data applyTransformToDestinationpublic static double[] normalizeToOne(double[] doubles)
public static double min(double[] doubles)
public static double max(double[] doubles)
public static void normalize(double[] doubles,
                             double sum)
doubles - the array of doublesum - the value by which the doubles are to be normalizedIllegalArgumentException - if sum is zero or NaNpublic static double[] logs2probs(double[] a)
a - an array holding the natural logarithms of the probabilitiespublic static double information(double[] probabilities)
probabilities - the probabilities to getFromOrigin the entropy forpublic static int maxIndex(double[] doubles)
doubles - the array of doublespublic static double factorial(double n)
n - the number to getFromOrigin the factorial forpublic static double probToLogOdds(double prob)
prob - the probabilitypublic static int round(double value)
value - the double valuepublic static double permutation(double n,
                                 double r)
n - the n to chooser - the number of elements to choosepublic static double combination(double n,
                                 double r)
n - the number of elements overallr - the number of elements to choosepublic static double hypotenuse(double a,
                                double b)
public static int probRound(double value,
                            Random rand)
value - the double valuerand - the random number generatorpublic static double roundDouble(double value,
                                 int afterDecimalPoint)
value - the double valueafterDecimalPoint - the number of digits after the decimal pointpublic static float roundFloat(float value,
                               int afterDecimalPoint)
value - the double valueafterDecimalPoint - the number of digits after the decimal pointpublic static double bernoullis(double n,
                                double k,
                                double successProb)
n - the number of trialsk - the number of times the target event occurssuccessProb - the probability of the event happeningpublic static boolean sm(double a,
                         double b)
a - a doubleb - a doublepublic static boolean gr(double a,
                         double b)
a - a doubleb - a doublepublic static double[] fromString(String data, String separator)
data - the data to separateseparator - the separator to usepublic static double mean(double[] vector)
vector - the arraypublic org.apache.commons.math3.linear.CholeskyDecomposition choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m)
                                                                         throws Exception
m - the matrix to convertorg.apache.commons.math3.linear.NonSquareMatrixExceptionExceptionpublic static int toDecimal(String binary)
binary - the binary string to convertpublic static int distanceFinderZValue(double[] vector)
vector - the vector to translatepublic static double euclideanDistance(double[] p,
                                       double[] q)
p - the first vectorq - the second vectorpublic static double euclideanDistance(float[] p,
                                       float[] q)
p - the first vectorq - the second vectorpublic static double[] generateUniform(int l)
l - the number of numbers to generatepublic static double manhattanDistance(double[] p,
                                       double[] q)
p - the first point vectorq - the second point vectorpublic static double[] sampleDoublesInInterval(double[][] doubles,
                                               int l)
public static int randomNumberBetween(double begin,
                                      double end)
begin - the begin of the intervalend - the end of the intervalpublic static int randomNumberBetween(double begin,
                                      double end,
                                      org.apache.commons.math3.random.RandomGenerator rng)
begin - the begin of the intervalend - the end of the intervalpublic static float randomFloatBetween(float begin,
                                       float end)
public static double randomDoubleBetween(double begin,
                                         double end)
public static void shuffleArray(int[] array,
                                long rngSeed)
public static void shuffleArray(int[] array,
                                Random rng)
Copyright © 2016. All Rights Reserved.