public class MathUtils extends Object
| Modifier and Type | Field and Description |
|---|---|
static double |
log2
The natural logarithm of 2.
|
static double |
SMALL
The small deviation allowed in double comparisons.
|
| Constructor and Description |
|---|
MathUtils() |
| Modifier and Type | Method and Description |
|---|---|
static double |
adjustedrSquared(double rSquared,
int numRegressors,
int numDataPoints)
This calculates the adjusted r^2 including degrees of freedom.
|
static double |
bernoullis(double n,
double k,
double successProb)
This will return the bernoulli trial for the given event.
|
static int |
binomial(org.apache.commons.math3.random.RandomGenerator rng,
int n,
double p)
Generates a binomial distributed number using
the given rng
|
org.apache.commons.math3.linear.CholeskyDecomposition |
choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m)
This will return the cholesky decomposition of
the given matrix
|
static int |
clamp(int value,
int min,
int max)
Clamps the value to a discrete value
|
static double |
combination(double n,
double r)
This returns the combination of n choose r
|
static List<double[]> |
coordSplit(double[] vector)
This returns the coordinate split in a list of coordinates
such that the values for ret[0] are the x values
and ret[1] are the y values
|
static List<double[]> |
coordSplit(List<Double> vector)
This returns the coordinate split in a list of coordinates
such that the values for ret[0] are the x values
and ret[1] are the y values
|
static double |
correlation(double[] residuals,
double[] targetAttribute)
Returns the correlation coefficient of two double vectors.
|
static double |
determinationCoefficient(double[] y1,
double[] y2,
int n)
This returns the determination coefficient of two vectors given a length
|
static int |
discretize(double value,
double min,
double max,
int binCount)
Discretize the given value
|
static int |
distanceFinderZValue(double[] vector)
This will translate a vector in to an equivalent integer
|
static double |
entropy(double[] vector)
This returns the entropy (information gain, or uncertainty of a random variable).
|
static double |
errorFor(double actual,
double prediction) |
static double |
euclideanDistance(double[] p,
double[] q)
This returns the distance of two vectors
sum(i=1,n) (q_i - p_i)^2
|
static double |
euclideanDistance(float[] p,
float[] q)
This returns the distance of two vectors
sum(i=1,n) (q_i - p_i)^2
|
static double |
factorial(double n)
This will return the factorial of the given number n.
|
static double[] |
fromString(String data,
String separator)
This will take a given string and separator and convert it to an equivalent
double array.
|
static double[] |
generateUniform(int l)
This will generate a series of uniformally distributed
numbers between l times
|
static boolean |
gr(double a,
double b)
Tests if a is greater than b.
|
static double |
hypotenuse(double a,
double b)
sqrt(a^2 + b^2) without under/overflow.
|
static double |
idf(double totalDocs,
double numTimesWordAppearedInADocument)
Inverse document frequency: the total docs divided by the number of times the word
appeared in a document
|
static double |
information(double[] probabilities)
This returns the entropy for a given vector of probabilities.
|
static int |
kroneckerDelta(double i,
double j)
This returns the kronecker delta of two doubles.
|
static double |
log2(double a)
Returns the logarithm of a for base 2.
|
static double[] |
logs2probs(double[] a)
Converts an array containing the natural logarithms of
probabilities stored in a vector back into probabilities.
|
static double |
manhattanDistance(double[] p,
double[] q)
This will calculate the Manhattan distance between two sets of points.
|
static double |
max(double[] doubles) |
static int |
maxIndex(double[] doubles)
Returns index of maximum element in a given
array of doubles.
|
static double |
mean(double[] vector)
Computes the mean for an array of doubles.
|
static double[] |
mergeCoords(double[] x,
double[] y)
This will merge the coordinates of the given coordinate system.
|
static List<Double> |
mergeCoords(List<Double> x,
List<Double> y)
This will merge the coordinates of the given coordinate system.
|
static double |
min(double[] doubles) |
static long |
nextPowOf2(long v)
See: http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2
|
static void |
normalize(double[] doubles,
double sum)
Normalizes the doubles in the array using the given value.
|
static double |
normalize(double val,
double min,
double max)
Normalize a value
(val - min) / (max - min)
|
static double[] |
normalizeToOne(double[] doubles) |
static List<List<Double>> |
partitionVariable(List<Double> arr,
int chunk)
This will partition the given whole variable data applyTransformToDestination in to the specified chunk number.
|
static double |
permutation(double n,
double r)
This returns the permutation of n choose r.
|
static int |
probRound(double value,
Random rand)
Rounds a double to the next nearest integer value in a probabilistic
fashion (e.g.
|
static double |
probToLogOdds(double prob)
Returns the log-odds for a given probability.
|
static double |
randomDoubleBetween(double begin,
double end) |
static float |
randomFloatBetween(float begin,
float end) |
static int |
randomNumberBetween(double begin,
double end)
Generates a random integer between the specified numbers
|
static int |
randomNumberBetween(double begin,
double end,
org.apache.commons.math3.random.RandomGenerator rng)
Generates a random integer between the specified numbers
|
static double |
rootMeansSquaredError(double[] real,
double[] predicted)
This returns the root mean squared error of two data sets
|
static int |
round(double value)
Rounds a double to the next nearest integer value.
|
static double |
roundDouble(double value,
int afterDecimalPoint)
Rounds a double to the given number of decimal places.
|
static float |
roundFloat(float value,
int afterDecimalPoint)
Rounds a double to the given number of decimal places.
|
static double[] |
sampleDoublesInInterval(double[][] doubles,
int l) |
static void |
shuffleArray(int[] array,
long rngSeed) |
static void |
shuffleArray(int[] array,
Random rng) |
static double |
sigmoid(double x)
1 / 1 + exp(-x)
|
double |
slope(double x1,
double x2,
double y1,
double y2)
This returns the slope of the given points.
|
static boolean |
sm(double a,
double b)
Tests if a is smaller than b.
|
static double |
squaredLoss(double[] x,
double[] y,
double w_0,
double w_1)
This will return the squared loss of the given
points
|
static double |
ssError(double[] predictedValues,
double[] targetAttribute)
How much of the variance is NOT explained by the regression
|
static double |
ssReg(double[] residuals,
double[] targetAttribute)
How much of the variance is explained by the regression
|
static double |
ssTotal(double[] residuals,
double[] targetAttribute)
Total variance in target attribute
|
static double |
stringSimilarity(String... strings)
Calculate string similarity with tfidf weights relative to each character
frequency and how many times a character appears in a given string
|
static double |
sum(double[] nums)
This returns the sum of the given array.
|
static double |
sumOfMeanDifferences(double[] vector,
double[] vector2)
Used for calculating top part of simple regression for
beta 1
|
static double |
sumOfMeanDifferencesOnePoint(double[] vector)
Used for calculating top part of simple regression for
beta 1
|
static double |
sumOfProducts(double[]... nums)
This returns the sum of products for the given
numbers.
|
static double |
sumOfSquares(double[] vector)
This returns the sum of squares for the given vector.
|
static double |
tf(int count,
int documentLength)
Term frequency: 1+ log10(count)
|
static double |
tfidf(double tf,
double idf)
Return td * idf
|
static double |
times(double[] nums)
This returns the product of all numbers in the given array.
|
static int |
toDecimal(String binary)
This will convert the given binary string to a decimal based
integer
|
static double |
uniform(Random rng,
double min,
double max)
Generate a uniform random number from the given rng
|
static double |
variance(double[] vector) |
static double |
vectorLength(double[] vector)
Returns the vector length (sqrt(sum(x_i))
|
static double |
w_0(double[] x,
double[] y,
int n) |
static double |
w_1(double[] x,
double[] y,
int n) |
static double[] |
weightsFor(double[] vector)
This returns the minimized loss values for a given vector.
|
static double[] |
weightsFor(List<Double> vector)
This returns the minimized loss values for a given vector.
|
static double[] |
xVals(double[] vector)
This returns the x values of the given vector.
|
static double[] |
yVals(double[] vector)
This returns the odd indexed values for the given vector
|
public static double log2
public static double SMALL
public static double normalize(double val,
double min,
double max)
val - value to normalizemax - max valuemin - min valuepublic static int clamp(int value,
int min,
int max)
value - the value to clampmin - min for the probability distributionmax - max for the probability distributionpublic static int discretize(double value,
double min,
double max,
int binCount)
value - the value to discretizemin - the min of the distributionmax - the max of the distributionbinCount - the number of binspublic static long nextPowOf2(long v)
v - the number to getFromOrigin the next power of 2 forpublic static int binomial(org.apache.commons.math3.random.RandomGenerator rng,
int n,
double p)
rng - n - p - public static double uniform(Random rng, double min, double max)
rng - the rng to usemin - the min nummax - the max numpublic static double correlation(double[] residuals,
double[] targetAttribute)
residuals - residualstargetAttribute - target attribute vectorpublic static double sigmoid(double x)
x - public static double ssReg(double[] residuals,
double[] targetAttribute)
residuals - errortargetAttribute - data for target attributepublic static double ssError(double[] predictedValues,
double[] targetAttribute)
predictedValues - predicted valuestargetAttribute - data for target attributepublic static double stringSimilarity(String... strings)
strings - the strings to calculate similarity forpublic static double vectorLength(double[] vector)
vector - the vector to return the vector length forpublic static double idf(double totalDocs,
double numTimesWordAppearedInADocument)
totalDocs - the total documents for the data applyTransformToDestinationnumTimesWordAppearedInADocument - the number of times the word occurred in a documentpublic static double tf(int count,
int documentLength)
count - the count of a word or character in a given string or documentpublic static double tfidf(double tf,
double idf)
tf - the term frequency (assumed calculated)idf - inverse document frequency (assumed calculated)public static double ssTotal(double[] residuals,
double[] targetAttribute)
residuals - errortargetAttribute - data for target attributepublic static double sum(double[] nums)
nums - the array of numbers to sumpublic static double[] mergeCoords(double[] x,
double[] y)
x - the x coordinatesy - the y coordinatespublic static List<Double> mergeCoords(List<Double> x, List<Double> y)
x - the x coordinatesy - the y coordinatespublic static double[] weightsFor(List<Double> vector)
vector - the vector of numbers to getFromOrigin the weights forpublic static double squaredLoss(double[] x,
double[] y,
double w_0,
double w_1)
x - the x coordinates to usey - the y coordinates to usew_0 - the first weightw_1 - the second weightpublic static double w_1(double[] x,
double[] y,
int n)
public static double w_0(double[] x,
double[] y,
int n)
public static double[] weightsFor(double[] vector)
vector - the vector of numbers to getFromOrigin the weights forpublic static double errorFor(double actual,
double prediction)
public static double sumOfMeanDifferences(double[] vector,
double[] vector2)
vector - the x coordinatesvector2 - the y coordinatespublic static double sumOfMeanDifferencesOnePoint(double[] vector)
vector - the x coordinatespublic static double variance(double[] vector)
public static double times(double[] nums)
nums - the numbers to multiply overpublic static double sumOfProducts(double[]... nums)
nums - the sum of products for the give numberspublic static List<double[]> coordSplit(double[] vector)
vector - the vector to split with x and y values/public static List<List<Double>> partitionVariable(List<Double> arr, int chunk)
arr - the data applyTransformToDestination to pass inchunk - the number to separate bypublic static List<double[]> coordSplit(List<Double> vector)
vector - the vector to split with x and y values
Note that the list will be more stable due to the size operator.
The array version will have extraneous values if not monitored
properly.public static double[] xVals(double[] vector)
vector - the vector to getFromOrigin the values forpublic static double[] yVals(double[] vector)
vector - the odd indexed values of rht egiven vectorpublic static double sumOfSquares(double[] vector)
vector - the vector to obtain the sum of squares forpublic static double determinationCoefficient(double[] y1,
double[] y2,
int n)
y1 - the first vectory2 - the second vectorn - the length of both vectorspublic static double log2(double a)
a - a doublepublic double slope(double x1,
double x2,
double y1,
double y2)
x1 - the first x to usex2 - the end x to usey1 - the begin y to usey2 - the end y to usepublic static double rootMeansSquaredError(double[] real,
double[] predicted)
real - the real valuespredicted - the predicted valuespublic static double entropy(double[] vector)
vector - the vector of values to getFromOrigin the entropy forpublic static int kroneckerDelta(double i,
double j)
i - the first number to comparej - the second number to comparepublic static double adjustedrSquared(double rSquared,
int numRegressors,
int numDataPoints)
rSquared - the r squared value to calculatenumRegressors - number of variablesnumDataPoints - size of the data applyTransformToDestinationpublic static double[] normalizeToOne(double[] doubles)
public static double min(double[] doubles)
public static double max(double[] doubles)
public static void normalize(double[] doubles,
double sum)
doubles - the array of doublesum - the value by which the doubles are to be normalizedIllegalArgumentException - if sum is zero or NaNpublic static double[] logs2probs(double[] a)
a - an array holding the natural logarithms of the probabilitiespublic static double information(double[] probabilities)
probabilities - the probabilities to getFromOrigin the entropy forpublic static int maxIndex(double[] doubles)
doubles - the array of doublespublic static double factorial(double n)
n - the number to getFromOrigin the factorial forpublic static double probToLogOdds(double prob)
prob - the probabilitypublic static int round(double value)
value - the double valuepublic static double permutation(double n,
double r)
n - the n to chooser - the number of elements to choosepublic static double combination(double n,
double r)
n - the number of elements overallr - the number of elements to choosepublic static double hypotenuse(double a,
double b)
public static int probRound(double value,
Random rand)
value - the double valuerand - the random number generatorpublic static double roundDouble(double value,
int afterDecimalPoint)
value - the double valueafterDecimalPoint - the number of digits after the decimal pointpublic static float roundFloat(float value,
int afterDecimalPoint)
value - the double valueafterDecimalPoint - the number of digits after the decimal pointpublic static double bernoullis(double n,
double k,
double successProb)
n - the number of trialsk - the number of times the target event occurssuccessProb - the probability of the event happeningpublic static boolean sm(double a,
double b)
a - a doubleb - a doublepublic static boolean gr(double a,
double b)
a - a doubleb - a doublepublic static double[] fromString(String data, String separator)
data - the data to separateseparator - the separator to usepublic static double mean(double[] vector)
vector - the arraypublic org.apache.commons.math3.linear.CholeskyDecomposition choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m)
throws Exception
m - the matrix to convertorg.apache.commons.math3.linear.NonSquareMatrixExceptionExceptionpublic static int toDecimal(String binary)
binary - the binary string to convertpublic static int distanceFinderZValue(double[] vector)
vector - the vector to translatepublic static double euclideanDistance(double[] p,
double[] q)
p - the first vectorq - the second vectorpublic static double euclideanDistance(float[] p,
float[] q)
p - the first vectorq - the second vectorpublic static double[] generateUniform(int l)
l - the number of numbers to generatepublic static double manhattanDistance(double[] p,
double[] q)
p - the first point vectorq - the second point vectorpublic static double[] sampleDoublesInInterval(double[][] doubles,
int l)
public static int randomNumberBetween(double begin,
double end)
begin - the begin of the intervalend - the end of the intervalpublic static int randomNumberBetween(double begin,
double end,
org.apache.commons.math3.random.RandomGenerator rng)
begin - the begin of the intervalend - the end of the intervalpublic static float randomFloatBetween(float begin,
float end)
public static double randomDoubleBetween(double begin,
double end)
public static void shuffleArray(int[] array,
long rngSeed)
public static void shuffleArray(int[] array,
Random rng)
Copyright © 2016. All Rights Reserved.