Module ukat.utils.arraystats
This module implements the ArrayStats class which calculates several
descriptive statistics of 2, 3 or 4D input arrays. That is, if the input array
is a 4D image (rows, columns, slices, "time"), it calculates statistics for
each 2D slice, each 3D volume and the entire 4D volume. See docstring of the
calculate
method for the list of the calculated statistical measures.
Classes
class ArrayStats (image, roi=None)
-
Class to calculate array statistics (optionally within a mask)
Parameters
image
:np.ndarray
- array with 2, 3 or 4 dimensions
roi
:np.ndarray (dtype: bool)
, optional- region of interest, same dimensions as
image
Attributes
image
:see above (parameters)
roi
:see above (parameters)
image_shape
:tuple
- shape (length of each dimension of image)
image_ndims
:int
- number of dimensions of image
Init method: see class docstring for parameters/attributes
Expand source code
class ArrayStats(): """Class to calculate array statistics (optionally within a mask) Parameters ---------- image : np.ndarray array with 2, 3 or 4 dimensions roi : np.ndarray (dtype: bool), optional region of interest, same dimensions as `image` Attributes ---------- image : see above (parameters) roi : see above (parameters) image_shape : tuple shape (length of each dimension of image) image_ndims: int number of dimensions of image """ def __init__(self, image, roi=None): """ Init method: see class docstring for parameters/attributes """ image_shape = image.shape image_ndims = len(image_shape) # Error checks if image_ndims < 2 or image_ndims > 4: raise ValueError("`image` must be [2, 3, 4]D") # Initialise unspecified input arguments if roi is None: roi = np.ones(image_shape, dtype=bool) elif isinstance(roi, np.ndarray) and roi.dtype == bool: if roi.shape != image_shape: raise ValueError("`roi.shape` must match `image.shape`") else: raise TypeError("`roi` must None or a numpy array with dtype=bool") self.image = image self.roi = roi self.image_shape = image_shape self.image_ndims = image_ndims def calculate(self): """Calculate array statistics Returns ------- dict dictionary where the keys are the calculated statistical measures: - 'n' : number of array elements - 'mean' - 'median' - 'min' - 'max' - 'std' : standard deviation - 'cv' : coefficient of variation (std/mean) - 'skewness' - 'kurtosis' - 'entropy' Each of the statistical measures is a dictionary in itself, where keys are the dimensions over which the calculation was performed. For example: - statistics['mean']['2D']: mean of each 2D slice - statistics['mean']['3D']: mean of each 3D volume - statistics['mean']['4D']: mean of each 4D volume """ # Init attribute that may need to be modified (i.e. add new axis) image = self.image roi = self.roi # Add new axis to `image` and `roi` if needed if self.image_ndims == 2: image = image[:, :, np.newaxis, np.newaxis] roi = roi[:, :, np.newaxis, np.newaxis] elif self.image_ndims == 3: image = image[:, :, :, np.newaxis] roi = roi[:, :, :, np.newaxis] # Get number of slices and time points of expanded `image` (i.e. after # np.newaxis). Note the `image_shape` attribute initialised in __init__ # refers to the shape before adding new axis (_, _, nz, nt) = image.shape # Pre-allocate 2D tmp2 = np.full((nz, nt), np.nan) n2 = np.copy(tmp2) mean2 = np.copy(tmp2) median2 = np.copy(tmp2) min2 = np.copy(tmp2) max2 = np.copy(tmp2) std2 = np.copy(tmp2) cv2 = np.copy(tmp2) skewness2 = np.copy(tmp2) kurtosis2 = np.copy(tmp2) entropy2 = np.copy(tmp2) # Pre-allocate 3D tmp3 = np.full(nt, np.nan) n3 = np.copy(tmp3) mean3 = np.copy(tmp3) median3 = np.copy(tmp3) min3 = np.copy(tmp3) max3 = np.copy(tmp3) std3 = np.copy(tmp3) cv3 = np.copy(tmp3) skewness3 = np.copy(tmp3) kurtosis3 = np.copy(tmp3) entropy3 = np.copy(tmp3) # Pre-allocate 4D tmp4 = np.full(1, np.nan) n4 = np.copy(tmp4) mean4 = np.copy(tmp4) median4 = np.copy(tmp4) min4 = np.copy(tmp4) max4 = np.copy(tmp4) std4 = np.copy(tmp4) cv4 = np.copy(tmp4) skewness4 = np.copy(tmp4) kurtosis4 = np.copy(tmp4) entropy4 = np.copy(tmp4) # Calculate statistics of the 4D volume intensities = image[roi] array_stats4 = FlatStats(intensities).calculate() n4 = array_stats4.n mean4 = array_stats4.mean median4 = array_stats4.median min4 = array_stats4.min max4 = array_stats4.max std4 = array_stats4.std cv4 = array_stats4.cv skewness4 = array_stats4.skewness kurtosis4 = array_stats4.kurtosis entropy4 = array_stats4.entropy for it in range(nt): # Calculate statistics of each 3D volume it_intensities = image[:, :, :, it][roi[:, :, :, it]] array_stats3 = FlatStats(it_intensities).calculate() n3[it] = array_stats3.n mean3[it] = array_stats3.mean median3[it] = array_stats3.median min3[it] = array_stats3.min max3[it] = array_stats3.max std3[it] = array_stats3.std cv3[it] = array_stats3.cv skewness3[it] = array_stats3.skewness kurtosis3[it] = array_stats3.kurtosis entropy3[it] = array_stats3.entropy for iz in range(nz): # Calculate statistics of each 2D slice iz_intensities = image[:, :, iz, it][roi[:, :, iz, it]] array_stats2 = FlatStats(iz_intensities).calculate() n2[iz, it] = array_stats2.n mean2[iz, it] = array_stats2.mean median2[iz, it] = array_stats2.median min2[iz, it] = array_stats2.min max2[iz, it] = array_stats2.max std2[iz, it] = array_stats2.std cv2[iz, it] = array_stats2.cv skewness2[iz, it] = array_stats2.skewness kurtosis2[iz, it] = array_stats2.kurtosis entropy2[iz, it] = array_stats2.entropy if self.image_ndims == 4: # Init dict for each dimension of each statistic n = { '2D': n2, # number of voxels in each 2D slice '3D': n3, # number of voxels in each 3D volume '4D': n4 # number of voxels in 4D volume } mean = { '2D': mean2, # mean of each 2D slice '3D': mean3, # mean of each 3D volume '4D': mean4 # mean of the 4D volume } median = { '2D': median2, '3D': median3, '4D': median4 } minimum = { '2D': min2, '3D': min3, '4D': min4 } maximum = { '2D': max2, '3D': max3, '4D': max4 } std = { '2D': std2, '3D': std3, '4D': std4 } cv = { '2D': cv2, '3D': cv3, '4D': cv4 } skewness = { '2D': skewness2, '3D': skewness3, '4D': skewness4 } kurtosis = { '2D': kurtosis2, '3D': kurtosis3, '4D': kurtosis4 } entropy = { '2D': entropy2, '3D': entropy3, '4D': entropy4 } elif self.image_ndims == 3: n = { '2D': n2.transpose()[0], '3D': n4, # n4 because {statistic}4 always returns the result } # over the entire array, which here is 3D mean = { '2D': mean2.transpose()[0], '3D': mean4, } median = { '2D': median2.transpose()[0], '3D': median4, } minimum = { '2D': min2.transpose()[0], '3D': min4, } maximum = { '2D': max2.transpose()[0], '3D': max4, } std = { '2D': std2.transpose()[0], '3D': std4, } cv = { '2D': cv2.transpose()[0], '3D': cv4, } skewness = { '2D': skewness2.transpose()[0], '3D': skewness4, } kurtosis = { '2D': kurtosis2.transpose()[0], '3D': kurtosis4, } entropy = { '2D': entropy2.transpose()[0], '3D': entropy4, } else: n = n4 # n4 because {statistic}4 always returns the mean = mean4 # result over the entire array, which here is 2D median = median4 minimum = min4 maximum = max4 std = std4 cv = cv4 skewness = skewness4 kurtosis = kurtosis4 entropy = entropy4 # Init statistics "wrapper" dictionary statistics = { 'n': n, 'mean': mean, 'median': median, 'min': minimum, 'max': maximum, 'std': std, 'cv': cv, 'skewness': skewness, 'kurtosis': kurtosis, 'entropy': entropy } return statistics
Methods
def calculate(self)
-
Calculate array statistics
Returns
dict
- dictionary where the keys are the calculated statistical measures: - 'n' : number of array elements - 'mean' - 'median' - 'min' - 'max' - 'std' : standard deviation - 'cv' : coefficient of variation (std/mean) - 'skewness' - 'kurtosis' - 'entropy' Each of the statistical measures is a dictionary in itself, where keys are the dimensions over which the calculation was performed. For example: - statistics['mean']['2D']: mean of each 2D slice - statistics['mean']['3D']: mean of each 3D volume - statistics['mean']['4D']: mean of each 4D volume
class FlatStats (x)
-
Helper class that calculates statistics of a flat (1D) array
Parameters
x
:np.ndarray
- flat array (1 dimension)
Attributes
x
:see above (parameters)
n
:float
- number of array elements
mean
:float
median
:float
min
:float
max
:float
std
:float
- standard deviation
cv
:float
- coefficient of variation (std/mean)
skewness
:float
kurtosis
:float
entropy
:float
Init method: see class documentation for parameters/attributes
Expand source code
class FlatStats(): """Helper class that calculates statistics of a flat (1D) array Parameters ---------- x : np.ndarray flat array (1 dimension) Attributes ---------- x : see above (parameters) n : float number of array elements mean : float median : float min : float max : float std : float standard deviation cv : float coefficient of variation (std/mean) skewness : float kurtosis : float entropy : float """ def __init__(self, x): """ Init method: see class documentation for parameters/attributes """ if np.ndim(x) != 1: raise ValueError("`x` should be a flat (1D) array") self.x = x # Init statistics (calculated in calculate()) self.n = NOT_CALCULATED_MSG self.mean = NOT_CALCULATED_MSG self.median = NOT_CALCULATED_MSG self.min = NOT_CALCULATED_MSG self.max = NOT_CALCULATED_MSG self.std = NOT_CALCULATED_MSG self.cv = NOT_CALCULATED_MSG self.skewness = NOT_CALCULATED_MSG self.kurtosis = NOT_CALCULATED_MSG self.entropy = NOT_CALCULATED_MSG def calculate(self): """Calculate flat array statistics Returns ------- FlatStats object with calculated statistics """ if self.x is None or self.x.size == 0: n = 0 mean = np.nan median = np.nan minimum = np.nan maximum = np.nan std = np.nan cv = np.nan skewness = np.nan kurtosis = np.nan entropy = np.nan elif np.isnan(self.x).any(): n = len(self.x) mean = np.nan median = np.nan minimum = np.nan maximum = np.nan std = np.nan cv = np.nan skewness = np.nan kurtosis = np.nan entropy = np.nan else: n = len(self.x) mean = np.mean(self.x) median = np.median(self.x) minimum = np.min(self.x) maximum = np.max(self.x) std = np.std(self.x) if mean == 0: cv = np.nan else: cv = std/mean # Don't need to deal with `nan_policy` as we catch arrays with nans # above and assign nans to the resulting statistical metrics skewness = stats.skew(self.x, bias=True) kurtosis = stats.kurtosis(self.x, fisher=True, bias=True) entropy = stats.entropy(self.x) self.n = n self.mean = mean self.median = median self.min = minimum self.max = maximum self.std = std self.cv = cv self.skewness = skewness self.kurtosis = kurtosis self.entropy = entropy return self
Methods
def calculate(self)