import numpy as np
import pandas as pd
import scipy.stats as stats
from . import _mannwhitneyu
[docs]
def fishers_exact(
df,
alternative=['greater','less']
):
"""\
Perform Fisher's exact test. Tests all columns in a 1 VS rest scheme.
Parameters
----------
df
A contingency table as :class:`~pandas.DataFrame`, with groups in the
rows and values in the columns.
alternative
The alternative for the test. Can also be a list of alternatives.
Available are:
- 'greater'
- 'less'
- 'two-sided'
Returns
-------
An :class:`~pandas.DataFrame` containing the enrichment p-values.
"""
if isinstance(alternative, str):
alternative = [alternative]
for a in alternative:
if a not in ['greater','less','two-sided']:
raise ValueError(f'`alternative` can only be "greater","less","two-sided", but got {a}!')
res = []
for values_val in df.columns:
not_values_val = df.columns != values_val
_values_val = ~not_values_val
values_mat = np.array([_values_val,not_values_val])
for groups_val in df.index:
not_groups_val = df.index != groups_val
_groups_val = ~not_groups_val
groups_mat = np.array([_groups_val,not_groups_val])
table = (groups_mat @ df.to_numpy() @ values_mat.T).T
for a in alternative:
p = stats.fisher_exact(table,alternative=a)[1] # anti-diagonal dominant
res.append((values_val,groups_val,a,p))
res_df = pd.DataFrame(res, columns=['value', 'group', 'alternative', 'p'])
res_df['group'] = res_df['group'].astype(df.index.dtype) # get same dtype
res_df['value'] = res_df['value'].astype(df.columns.dtype) # get same dtype
res_df['alternative'] = res_df['alternative'].astype('category') # use reasonable dtype
return res_df
def _wrap_test(
test_function,
df,
alternative=['greater','less'],
):
"""\
Wraps test functions in a convenient dataframe compatible format.
Parameters
----------
test_function
The test function to wrap. It must return the p value and have the
signature `f(samples0, sample1, alternative)`, where `samples0` and
`sample1` are the two samples to test for equality, and `alternative`
is one of the strings given below.
df
A table of samples as :class:`~pandas.DataFrame`, with groups in the
rows and values in the columns. If the index is a multilevel index, the
second level is interpreted as sample annotation.
alternative
The alternative for the test. Can also be a list of alternatives.
Available are:
- 'greater'
- 'less'
- 'two-sided'
Returns
-------
An :class:`~pandas.DataFrame` containing the enrichment p-values.
"""
if isinstance(alternative, str):
alternative = [alternative]
for a in alternative:
if a not in ['greater','less','two-sided']:
raise ValueError(f'`alternative` can only be "greater","less","two-sided", but got {a}!')
res = []
if isinstance(df.index, pd.MultiIndex):
df_index = df.index.get_level_values(0)
group_vals = df.index.levels[0]
else:
df_index = df.index
group_vals = df.index
_groups_vals = pd.get_dummies(df_index).astype(bool)
for values_val in df.columns:
for groups_val in group_vals:
_groups_val = _groups_vals[groups_val]
not_groups_val = ~_groups_val
samples0 = df[values_val].to_numpy()[_groups_val]
samples1 = df[values_val].to_numpy()[not_groups_val]
if len(samples0) == 0 or ((samples0 == samples0[0]).all() and (samples1 == samples0[0]).all()):
continue
for a in alternative:
p = test_function(samples0,samples1,alternative=a)
res.append((values_val,groups_val,a,p))
res_df = pd.DataFrame(res, columns=['value', 'group', 'alternative', 'p'])
if isinstance(df.index, pd.MultiIndex):
res_df['group'] = res_df['group'].astype(df.index.levels[0].dtype) # get same dtype
else:
res_df['group'] = res_df['group'].astype(df.index.dtype) # get same dtype
res_df['value'] = res_df['value'].astype(df.columns.dtype) # get same dtype
res_df['alternative'] = res_df['alternative'].astype('category') # use reasonable dtype
return res_df
[docs]
def mannwhitneyu(
df,
alternative=['greater','less'],
):
"""\
Perform Mann-Whitney-U test. Tests all columns.
Parameters
----------
df
A table of samples as :class:`~pandas.DataFrame`, with groups in the
rows and values in the columns. If the index is a multilevel index, the
second level is interpreted as sample annotation.
alternative
The alternative for the test. Can also be a list of alternatives.
Available are:
- 'greater'
- 'less'
- 'two-sided'
Returns
-------
An :class:`~pandas.DataFrame` containing the enrichment p-values.
"""
def test_f(samples0, samples1, alternative):
return _mannwhitneyu.mannwhitneyu(samples0,samples1,alternative=alternative,exact=True)[1]
return _wrap_test(test_f, df, alternative=alternative)
[docs]
def studentttest(
df,
alternative=['greater','less'],
n_boot=0,
):
"""\
Perform Student's t test. Tests all columns.
Parameters
----------
df
A table of samples as :class:`~pandas.DataFrame`, with groups in the
rows and values in the columns. If the index is a multilevel index, the
second level is interpreted as sample annotation.
alternative
The alternative for the test. Can also be a list of alternatives.
Available are:
- 'greater'
- 'less'
- 'two-sided'
n_boot
The number of bootstrap samples which are included in addition to the
real samples.
Returns
-------
An :class:`~pandas.DataFrame` containing the enrichment p-values.
"""
def test_f(samples0, samples1, alternative):
nobs0 = len(samples0) / (1+n_boot)
nobs1 = len(samples1) / (1+n_boot)
std0 = np.std(samples0, axis=0, ddof=1+n_boot)
std1 = np.std(samples1, axis=0, ddof=1+n_boot)
mean0 = np.mean(samples0, axis=0)
mean1 = np.mean(samples1, axis=0)
return stats.ttest_ind_from_stats(mean0, std0, nobs0, mean1, std1, nobs1, equal_var=True, alternative=alternative)[1]
return _wrap_test(test_f, df, alternative=alternative)
[docs]
def welchttest(
df,
alternative=['greater','less'],
n_boot=0,
):
"""\
Perform Welch's t test. Tests all columns.
Parameters
----------
df
A table of samples as :class:`~pandas.DataFrame`, with groups in the
rows and values in the columns. If the index is a multilevel index, the
second level is interpreted as sample annotation.
alternative
The alternative for the test. Can also be a list of alternatives.
Available are:
- 'greater'
- 'less'
- 'two-sided'
n_boot
The number of bootstrap samples which are included in addition to the
real samples.
Returns
-------
An :class:`~pandas.DataFrame` containing the enrichment p-values.
"""
def test_f(samples0, samples1, alternative):
nobs0 = len(samples0) / (1+n_boot)
nobs1 = len(samples1) / (1+n_boot)
std0 = np.std(samples0, axis=0, ddof=1+n_boot)
std1 = np.std(samples1, axis=0, ddof=1+n_boot)
mean0 = np.mean(samples0, axis=0)
mean1 = np.mean(samples1, axis=0)
return stats.ttest_ind_from_stats(mean0, std0, nobs0, mean1, std1, nobs1, equal_var=False, alternative=alternative)[1]
return _wrap_test(test_f, df, alternative=alternative)