public class Descriptive extends Object
Modifier | Constructor and Description |
---|---|
protected |
Descriptive()
Makes this class non instantiable, but still let's others inherit from it.
|
Modifier and Type | Method and Description |
---|---|
static double |
autoCorrelation(DoubleArrayList data,
int lag,
double mean,
double variance)
Returns the auto-correlation of a data sequence.
|
protected static void |
checkRangeFromTo(int from,
int to,
int theSize)
Checks if the given range is within the contained array's bounds.
|
static double |
correlation(DoubleArrayList data1,
double standardDev1,
DoubleArrayList data2,
double standardDev2)
Returns the correlation of two data sequences.
|
static double |
covariance(DoubleArrayList data1,
DoubleArrayList data2)
Returns the covariance of two data sequences, which is
cov(x,y) = (1/(size()-1)) * Sum((x[i]-mean(x)) * (y[i]-mean(y))).
|
static double |
durbinWatson(DoubleArrayList data)
Durbin-Watson computation.
|
static void |
frequencies(DoubleArrayList sortedData,
DoubleArrayList distinctValues,
IntArrayList frequencies)
Computes the frequency (number of occurances, count) of each distinct value in the given sorted data.
|
static double |
geometricMean(DoubleArrayList data)
Returns the geometric mean of a data sequence.
|
static double |
geometricMean(int size,
double sumOfLogarithms)
Returns the geometric mean of a data sequence.
|
static double |
harmonicMean(int size,
double sumOfInversions)
Returns the harmonic mean of a data sequence.
|
static void |
incrementalUpdate(DoubleArrayList data,
int from,
int to,
double[] inOut)
Incrementally maintains and updates minimum, maximum, sum and sum of squares of a data sequence.
|
static void |
incrementalUpdateSumsOfPowers(DoubleArrayList data,
int from,
int to,
int fromSumIndex,
int toSumIndex,
double[] sumOfPowers)
Incrementally maintains and updates various sums of powers of the form Sum(data[i]k).
|
static void |
incrementalWeightedUpdate(DoubleArrayList data,
DoubleArrayList weights,
int from,
int to,
double[] inOut)
Incrementally maintains and updates sum and sum of squares of a weighted data sequence.
|
static double |
kurtosis(DoubleArrayList data,
double mean,
double standardDeviation)
Returns the kurtosis (aka excess) of a data sequence, which is -3 + moment(data,4,mean) / standardDeviation4.
|
static double |
kurtosis(double moment4,
double standardDeviation)
Returns the kurtosis (aka excess) of a data sequence.
|
static double |
lag1(DoubleArrayList data,
double mean)
Returns the lag-1 autocorrelation of a dataset;
Note that this method has semantics different from autoCorrelation(..., 1);
|
static double |
max(DoubleArrayList data)
Returns the largest member of a data sequence.
|
static double |
mean(DoubleArrayList data)
Returns the arithmetic mean of a data sequence;
That is Sum( data[i] ) / data.size().
|
static double |
meanDeviation(DoubleArrayList data,
double mean)
Returns the mean deviation of a dataset.
|
static double |
median(DoubleArrayList sortedData)
Returns the median of a sorted data sequence.
|
static double |
min(DoubleArrayList data)
Returns the smallest member of a data sequence.
|
static double |
moment(DoubleArrayList data,
int k,
double c)
Returns the moment of k-th order with constant c of a data sequence,
which is Sum( (data[i]-c)k ) / data.size().
|
static double |
moment(int k,
double c,
int size,
double[] sumOfPowers)
Returns the moment of k-th order with constant c of a data sequence,
which is Sum( (data[i]-c)k ) / data.size().
|
static double |
pooledMean(int size1,
double mean1,
int size2,
double mean2)
Returns the pooled mean of two data sequences.
|
static double |
pooledVariance(int size1,
double variance1,
int size2,
double variance2)
Returns the pooled variance of two data sequences.
|
static double |
product(DoubleArrayList data)
Returns the product of a data sequence, which is Prod( data[i] ).
|
static double |
product(int size,
double sumOfLogarithms)
Returns the product, which is Prod( data[i] ).
|
static double |
quantile(DoubleArrayList sortedData,
double phi)
Returns the phi-quantile; that is, an element elem for which holds that phi percent of data elements are less than elem.
|
static double |
quantileInverse(DoubleArrayList sortedList,
double element)
Returns how many percent of the elements contained in the receiver are <= element.
|
static DoubleArrayList |
quantiles(DoubleArrayList sortedData,
DoubleArrayList percentages)
Returns the quantiles of the specified percentages.
|
static double |
rankInterpolated(DoubleArrayList sortedList,
double element)
Returns the linearly interpolated number of elements in a list less or equal to a given element.
|
static double |
rms(int size,
double sumOfSquares)
Returns the RMS (Root-Mean-Square) of a data sequence.
|
static double |
sampleKurtosis(DoubleArrayList data,
double mean,
double sampleVariance)
Returns the sample kurtosis (aka excess) of a data sequence.
|
static double |
sampleKurtosis(int size,
double moment4,
double sampleVariance)
Returns the sample kurtosis (aka excess) of a data sequence.
|
static double |
sampleKurtosisStandardError(int size)
Return the standard error of the sample kurtosis.
|
static double |
sampleSkew(DoubleArrayList data,
double mean,
double sampleVariance)
Returns the sample skew of a data sequence.
|
static double |
sampleSkew(int size,
double moment3,
double sampleVariance)
Returns the sample skew of a data sequence.
|
static double |
sampleSkewStandardError(int size)
Return the standard error of the sample skew.
|
static double |
sampleStandardDeviation(int size,
double sampleVariance)
Returns the sample standard deviation.
|
static double |
sampleVariance(DoubleArrayList data,
double mean)
Returns the sample variance of a data sequence.
|
static double |
sampleVariance(int size,
double sum,
double sumOfSquares)
Returns the sample variance of a data sequence.
|
static double |
sampleWeightedVariance(double sumOfWeights,
double sumOfProducts,
double sumOfSquaredProducts)
Returns the sample weighted variance of a data sequence.
|
static double |
skew(DoubleArrayList data,
double mean,
double standardDeviation)
Returns the skew of a data sequence, which is moment(data,3,mean) / standardDeviation3.
|
static double |
skew(double moment3,
double standardDeviation)
Returns the skew of a data sequence.
|
static DoubleArrayList[] |
split(DoubleArrayList sortedList,
DoubleArrayList splitters)
Splits (partitions) a list into sublists such that each sublist contains the elements with a given range.
|
static double |
standardDeviation(double variance)
Returns the standard deviation from a variance.
|
static double |
standardError(int size,
double variance)
Returns the standard error of a data sequence.
|
static void |
standardize(DoubleArrayList data,
double mean,
double standardDeviation)
Modifies a data sequence to be standardized.
|
static double |
sum(DoubleArrayList data)
Returns the sum of a data sequence.
|
static double |
sumOfInversions(DoubleArrayList data,
int from,
int to)
Returns the sum of inversions of a data sequence,
which is Sum( 1.0 / data[i]).
|
static double |
sumOfLogarithms(DoubleArrayList data,
int from,
int to)
Returns the sum of logarithms of a data sequence, which is Sum( Log(data[i]).
|
static double |
sumOfPowerDeviations(DoubleArrayList data,
int k,
double c)
Returns Sum( (data[i]-c)k ); optimized for common parameters like c == 0.0 and/or k == -2 ..
|
static double |
sumOfPowerDeviations(DoubleArrayList data,
int k,
double c,
int from,
int to)
Returns Sum( (data[i]-c)k ) for all i = from ..
|
static double |
sumOfPowers(DoubleArrayList data,
int k)
Returns the sum of powers of a data sequence, which is Sum ( data[i]k ).
|
static double |
sumOfSquaredDeviations(int size,
double variance)
Returns the sum of squared mean deviation of of a data sequence.
|
static double |
sumOfSquares(DoubleArrayList data)
Returns the sum of squares of a data sequence.
|
static double |
trimmedMean(DoubleArrayList sortedData,
double mean,
int left,
int right)
Returns the trimmed mean of a sorted data sequence.
|
static double |
variance(double standardDeviation)
Returns the variance from a standard deviation.
|
static double |
variance(int size,
double sum,
double sumOfSquares)
Returns the variance of a data sequence.
|
static double |
weightedMean(DoubleArrayList data,
DoubleArrayList weights)
Returns the weighted mean of a data sequence.
|
static double |
weightedRMS(double sumOfProducts,
double sumOfSquaredProducts)
Returns the weighted RMS (Root-Mean-Square) of a data sequence.
|
static double |
winsorizedMean(DoubleArrayList sortedData,
double mean,
int left,
int right)
Returns the winsorized mean of a sorted data sequence.
|
protected Descriptive()
public static double autoCorrelation(DoubleArrayList data, int lag, double mean, double variance)
protected static void checkRangeFromTo(int from, int to, int theSize)
IndexOutOfBoundsException
- if to!=from-1 || from<0 || from>to || to>=size().public static double correlation(DoubleArrayList data1, double standardDev1, DoubleArrayList data2, double standardDev2)
public static double covariance(DoubleArrayList data1, DoubleArrayList data2)
public static double durbinWatson(DoubleArrayList data)
public static void frequencies(DoubleArrayList sortedData, DoubleArrayList distinctValues, IntArrayList frequencies)
Distinct values are filled into distinctValues, starting at index 0.
The frequency of each distinct value is filled into frequencies, starting at index 0.
As a result, the smallest distinct value (and its frequency) can be found at index 0, the second smallest distinct value (and its frequency) at index 1, ..., the largest distinct value (and its frequency) at index distinctValues.size()-1.
Example:
elements = (5,6,6,7,8,8) --> distinctValues = (5,6,7,8), frequencies = (1,2,1,2)
sortedData
- the data; must be sorted ascending.distinctValues
- a list to be filled with the distinct values; can have any size.frequencies
- a list to be filled with the frequencies; can have any size; set this parameter to null to ignore it.public static double geometricMean(int size, double sumOfLogarithms)
public static double geometricMean(DoubleArrayList data)
public static double harmonicMean(int size, double sumOfInversions)
size
- the number of elements in the data sequence.sumOfInversions
- Sum( 1.0 / data[i]).public static void incrementalUpdate(DoubleArrayList data, int from, int to, double[] inOut)
This method computes those updated values without needing to know the already recorded elements. This is interesting for interactive online monitoring and/or applications that cannot keep the entire huge data sequence in memory.
Definition of sumOfSquares: sumOfSquares(n) = Sum ( data[i] * data[i] ).
data
- the additional elements to be incorporated into min, max, etc.from
- the index of the first element within data to consider.to
- the index of the last element within data to consider.
The method incorporates elements data[from], ..., data[to].inOut
- the old values in the following format:
public static void incrementalUpdateSumsOfPowers(DoubleArrayList data, int from, int to, int fromSumIndex, int toSumIndex, double[] sumOfPowers)
This method computes those updated values without needing to know the already recorded elements. This is interesting for interactive online monitoring and/or applications that cannot keep the entire huge data sequence in memory. For example, the incremental computation of moments is based upon such sums of powers:
The moment of k-th order with constant c of a data sequence, is given by Sum( (data[i]-c)k ) / data.size(). It can incrementally be computed by using the equivalent formula
moment(k,c) = m(k,c) / data.size() where
m(k,c) = Sum( -1i * b(k,i) * ci * sumOfPowers(k-i)) for i = 0 .. k and
b(k,i) = binomial(k,i)
and
sumOfPowers(k) = Sum( data[i]k ).
data
- the additional elements to be incorporated into min, max, etc.from
- the index of the first element within data to consider.to
- the index of the last element within data to consider.
The method incorporates elements data[from], ..., data[to].inOut
- the old values of the sums in the following format:
public static void incrementalWeightedUpdate(DoubleArrayList data, DoubleArrayList weights, int from, int to, double[] inOut)
This method computes those updated values without needing to know the already recorded elements. This is interesting for interactive online monitoring and/or applications that cannot keep the entire huge data sequence in memory.
Definition of sum: sum = Sum ( data[i] * weights[i] ).
Definition of sumOfSquares: sumOfSquares = Sum ( data[i] * data[i] * weights[i]).
data
- the additional elements to be incorporated into min, max, etc.weights
- the weight of each element within data.from
- the index of the first element within data (and weights) to consider.to
- the index of the last element within data (and weights) to consider.
The method incorporates elements data[from], ..., data[to].inOut
- the old values in the following format:
public static double kurtosis(double moment4, double standardDeviation)
moment4
- the fourth central moment, which is moment(data,4,mean).standardDeviation
- the standardDeviation.public static double kurtosis(DoubleArrayList data, double mean, double standardDeviation)
public static double lag1(DoubleArrayList data, double mean)
public static double max(DoubleArrayList data)
public static double mean(DoubleArrayList data)
public static double meanDeviation(DoubleArrayList data, double mean)
public static double median(DoubleArrayList sortedData)
sortedData
- the data sequence; must be sorted ascending.public static double min(DoubleArrayList data)
public static double moment(int k, double c, int size, double[] sumOfPowers)
sumOfPowers
- sumOfPowers[m] == Sum( data[i]m) ) for m = 0,1,..,k as returned by method incrementalUpdateSumsOfPowers(DoubleArrayList,int,int,int,int,double[])
.
In particular there must hold sumOfPowers.length == k+1.size
- the number of elements of the data sequence.public static double moment(DoubleArrayList data, int k, double c)
public static double pooledMean(int size1, double mean1, int size2, double mean2)
size1
- the number of elements in data sequence 1.mean1
- the mean of data sequence 1.size2
- the number of elements in data sequence 2.mean2
- the mean of data sequence 2.public static double pooledVariance(int size1, double variance1, int size2, double variance2)
size1
- the number of elements in data sequence 1.variance1
- the variance of data sequence 1.size2
- the number of elements in data sequence 2.variance2
- the variance of data sequence 2.public static double product(int size, double sumOfLogarithms)
public static double product(DoubleArrayList data)
public static double quantile(DoubleArrayList sortedData, double phi)
sortedData
- the data sequence; must be sorted ascending.phi
- the percentage; must satisfy 0 <= phi <= 1.public static double quantileInverse(DoubleArrayList sortedList, double element)
sortedList
- the list to be searched (must be sorted ascending).element
- the element to search for.public static DoubleArrayList quantiles(DoubleArrayList sortedData, DoubleArrayList percentages)
sortedData
- the data sequence; must be sorted ascending.percentages
- the percentages for which quantiles are to be computed.
Each percentage must be in the interval [0.0,1.0].public static double rankInterpolated(DoubleArrayList sortedList, double element)
sortedList
- the list to be searched (must be sorted ascending).element
- the element to search for.public static double rms(int size, double sumOfSquares)
sumOfSquares
- sumOfSquares(data) == Sum( data[i]*data[i] ) of the data sequence.size
- the number of elements in the data sequence.public static double sampleKurtosis(int size, double moment4, double sampleVariance)
size
- the number of elements of the data sequence.moment4
- the fourth central moment, which is moment(data,4,mean).sampleVariance
- the sample variance.public static double sampleKurtosis(DoubleArrayList data, double mean, double sampleVariance)
public static double sampleKurtosisStandardError(int size)
size
- the number of elements of the data sequence.public static double sampleSkew(int size, double moment3, double sampleVariance)
size
- the number of elements of the data sequence.moment3
- the third central moment, which is moment(data,3,mean).sampleVariance
- the sample variance.public static double sampleSkew(DoubleArrayList data, double mean, double sampleVariance)
public static double sampleSkewStandardError(int size)
size
- the number of elements of the data sequence.public static double sampleStandardDeviation(int size, double sampleVariance)
size
- the number of elements of the data sequence.sampleVariance
- the sample variance.public static double sampleVariance(int size, double sum, double sumOfSquares)
size
- the number of elements of the data sequence.sum
- == Sum( data[i] ).sumOfSquares
- == Sum( data[i]*data[i] ).public static double sampleVariance(DoubleArrayList data, double mean)
public static double sampleWeightedVariance(double sumOfWeights, double sumOfProducts, double sumOfSquaredProducts)
sumOfWeights
- == Sum( weights[i] ).sumOfProducts
- == Sum( data[i] * weights[i] ).sumOfSquaredProducts
- == Sum( data[i] * data[i] * weights[i] ).public static double skew(double moment3, double standardDeviation)
moment3
- the third central moment, which is moment(data,3,mean).standardDeviation
- the standardDeviation.public static double skew(DoubleArrayList data, double mean, double standardDeviation)
public static DoubleArrayList[] split(DoubleArrayList sortedList, DoubleArrayList splitters)
Examples:
sortedList
- the list to be partitioned (must be sorted ascending).splitters
- the points at which the list shall be partitioned (must be sorted ascending).public static double standardDeviation(double variance)
public static double standardError(int size, double variance)
size
- the number of elements in the data sequence.variance
- the variance of the data sequence.public static void standardize(DoubleArrayList data, double mean, double standardDeviation)
public static double sum(DoubleArrayList data)
public static double sumOfInversions(DoubleArrayList data, int from, int to)
data
- the data sequence.from
- the index of the first data element (inclusive).to
- the index of the last data element (inclusive).public static double sumOfLogarithms(DoubleArrayList data, int from, int to)
data
- the data sequence.from
- the index of the first data element (inclusive).to
- the index of the last data element (inclusive).public static double sumOfPowerDeviations(DoubleArrayList data, int k, double c)
public static double sumOfPowerDeviations(DoubleArrayList data, int k, double c, int from, int to)
public static double sumOfPowers(DoubleArrayList data, int k)
public static double sumOfSquaredDeviations(int size, double variance)
size
- the number of elements of the data sequence.variance
- the variance of the data sequence.public static double sumOfSquares(DoubleArrayList data)
public static double trimmedMean(DoubleArrayList sortedData, double mean, int left, int right)
sortedData
- the data sequence; must be sorted ascending.mean
- the mean of the (full) sorted data sequence.public static double variance(double standardDeviation)
public static double variance(int size, double sum, double sumOfSquares)
size
- the number of elements of the data sequence.sum
- == Sum( data[i] ).sumOfSquares
- == Sum( data[i]*data[i] ).public static double weightedMean(DoubleArrayList data, DoubleArrayList weights)
public static double weightedRMS(double sumOfProducts, double sumOfSquaredProducts)
sumOfProducts
- == Sum( data[i] * weights[i] ).sumOfSquaredProducts
- == Sum( data[i] * data[i] * weights[i] ).public static double winsorizedMean(DoubleArrayList sortedData, double mean, int left, int right)
sortedData
- the data sequence; must be sorted ascending.mean
- the mean of the (full) sorted data sequence.Copyright © 2006–2019 SYSTAP, LLC DBA Blazegraph. All rights reserved.