Source code for scilpy.stats.matrix_stats

# -*- coding: utf-8 -*-
import logging

import bct

import numpy as np
from scipy.stats import t as stats_t
from statsmodels.stats.multitest import multipletests


def _ttest_stat_only(x, y, tail):
    t = np.mean(x) - np.mean(y)
    n1, n2 = len(x), len(y)
    s = np.sqrt(((n1 - 1) * np.var(x, ddof=1) + (n2 - 1)
                 * np.var(y, ddof=1)) / (n1 + n2 - 2))
    denom = s * np.sqrt(1 / n1 + 1 / n2)
    if denom == 0:
        return 0
    if tail == 'both':
        return np.abs(t / denom)
    if tail == 'left':
        return -t / denom
    else:
        return t / denom


def _ttest_paired_stat_only(x, y, tail):
    n = len(x - y)
    sample_ss = np.sum((x - y)**2) - np.sum(x - y)**2 / n
    unbiased_std = np.sqrt(sample_ss / (n - 1))

    z = np.mean(x - y) / unbiased_std
    t = z * np.sqrt(n)
    if tail == 'both':
        return np.abs(t)
    if tail == 'left':
        return -t
    else:
        return t



[docs]
def ttest_two_matrices(matrices_g1, matrices_g2, paired, tail, fdr,
                       bonferroni):
    """
    Parameters
    ----------
    matrices_g1: np.ndarray of shape ?  (toDO)
    matrices_g2: np.ndarray of shape ?
    paired: bool
        Use paired sample t-test instead of population t-test. The two matrices
        must be ordered the same way.
    tail: str.
        One of ['left', 'right', 'both'].
    fdr: bool
        Perform a false discovery rate (FDR) correction for the p-values. Uses
        the number of non-zero edges as number of tests (value between 0.01 and
        0.1).
    bonferroni: bool
        Perform a Bonferroni correction for the p-values. Uses the number of
        non-zero edges as number of tests.
    """
    matrix_shape = matrices_g1.shape[0:2]
    nb_group_g1 = matrices_g1.shape[2]
    nb_group_g2 = matrices_g2.shape[2]

    # Todo better reshape, more simple
    sum_both_groups = np.sum(matrices_g1, axis=2) + np.sum(matrices_g2, axis=2)
    nbr_non_zeros = np.count_nonzero(np.triu(sum_both_groups))

    logging.info('The provided matrices contain {} non zeros elements.'
                 .format(nbr_non_zeros))

    matrices_g1 = matrices_g1.reshape((np.prod(matrix_shape), nb_group_g1))
    matrices_g2 = matrices_g2.reshape((np.prod(matrix_shape), nb_group_g2))
    # Negative epsilon, to differentiate from null p-values
    matrix_pval = np.ones(np.prod(matrix_shape)) * -0.000001

    text = ' paired' if paired else ''
    logging.info('Performing{} t-test with "{}" hypothesis.'
                 .format(text, tail))
    logging.info('Data has dimensions {}x{} with {} and {} observations.'
                 .format(matrix_shape[0], matrix_shape[1],
                          nb_group_g1, nb_group_g2))

    # For conversion to p-values
    if paired:
        dof = nb_group_g1 - 1
    else:
        dof = nb_group_g1 + nb_group_g2 - 2

    for ind in range(np.prod(matrix_shape)):
        # Skip edges with no data, leaves a negative epsilon instead
        if not matrices_g1[ind].any() and not matrices_g2[ind].any():
            continue

        if paired:
            t_stat = (_ttest_paired_stat_only(
                matrices_g1[ind], matrices_g2[ind], tail))
        else:
            t_stat = _ttest_stat_only(
                matrices_g1[ind], matrices_g2[ind], tail)

        pval = stats_t.sf(t_stat, dof)
        matrix_pval[ind] = pval if tail == 'both' else pval / 2.0

    corr_matrix_pval = matrix_pval.reshape(matrix_shape)
    if fdr:
        logging.info('Using FDR, the results will be q-values.')
        corr_matrix_pval = np.triu(corr_matrix_pval)
        corr_matrix_pval[corr_matrix_pval > 0] = multipletests(
            corr_matrix_pval[corr_matrix_pval > 0], 0, method='fdr_bh')[1]

        # Symmetrize  the matrix
        matrix_pval = corr_matrix_pval + corr_matrix_pval.T - \
            np.diag(corr_matrix_pval.diagonal())
    elif bonferroni:
        corr_matrix_pval = np.triu(corr_matrix_pval)
        corr_matrix_pval[corr_matrix_pval > 0] = multipletests(
            corr_matrix_pval[corr_matrix_pval > 0], 0, method='bonferroni')[1]

        # Symmetrize  the matrix
        matrix_pval = corr_matrix_pval + corr_matrix_pval.T - \
            np.diag(corr_matrix_pval.diagonal())
    else:
        matrix_pval = matrix_pval.reshape(matrix_shape)

    return matrix_pval




[docs]
def omega_sigma(matrix):
    """Returns the small-world coefficients (omega & sigma) of a graph.
    Omega ranges between -1 and 1. Values close to 0 mean the matrix
    features small-world characteristics.
    Values close to -1 mean the network has a lattice structure and values
    close to 1 mean G is a random network.

    A network is commonly classified as small-world if sigma > 1.

    Parameters
    ----------
    matrix : numpy.ndarray
        A weighted undirected graph.
    Returns
    -------
    smallworld : tuple of float
        The small-work coefficients (omega & sigma).
    Notes
    -----
    The implementation is adapted from the algorithm by Telesford et al. [1]_.
    References
    ----------
    .. [1] Telesford, Joyce, Hayasaka, Burdette, and Laurienti (2011).
           "The Ubiquity of Small-World Networks".
           Brain Connectivity. 1 (0038): 367-75.  PMC 3604768. PMID 22432451.
           doi:10.1089/brain.2011.0038.
    """
    transitivity_rand_list = []
    transitivity_latt_list = []
    path_length_rand_list = []
    for i in range(10):
        logging.info('Generating random and lattice matrices, '
                     'iteration #{}.'.format(i))
        random = bct.randmio_und(matrix, 10)[0]
        lattice = bct.latmio_und(matrix, 10)[1]

        transitivity_rand_list.append(bct.transitivity_wu(random))
        transitivity_latt_list.append(bct.transitivity_wu(lattice))
        path_length_rand_list.append(
            float(np.average(bct.distance_wei(random)[0])))

    transitivity = bct.transitivity_wu(matrix)
    path_length = float(np.average(bct.distance_wei(matrix)[0]))
    transitivity_rand = np.mean(transitivity_rand_list)
    transitivity_latt = np.mean(transitivity_latt_list)
    path_length_rand = np.mean(path_length_rand_list)

    omega = (path_length_rand / path_length) - \
        (transitivity / transitivity_latt)
    sigma = (transitivity / transitivity_rand) / \
        (path_length / path_length_rand)

    return float(omega), float(sigma)