Source code for tacco.plots._plots

import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from matplotlib.colors import to_rgb, to_rgba, rgb_to_hsv, hsv_to_rgb, to_rgba_array, PowerNorm, LinearSegmentedColormap, to_hex, ListedColormap, LogNorm, Normalize
from matplotlib.cm import ScalarMappable
import seaborn as sns
import scipy.stats
import scipy.cluster
from ..tools._enrichments import get_compositions, get_contributions, enrichments
from .. import utils
from .. import tools
from .. import get
from .. import preprocessing
import joblib
import gc
from numba import njit

def get_min_max(vector, log=True):
    if log:
        maximum = np.log(np.nanmax(vector))
        minimum = np.log(np.nanmin(vector[vector > 0]))
        delta = (maximum - minimum) * 0.1
        maximum += delta
        minimum -= delta
        maximum = np.exp(maximum)
        minimum = np.exp(minimum)
    else:
        maximum = np.nanmax(vector)
        minimum = np.nanmin(vector)
        #delta = (maximum - minimum) * 0.1
        #maximum += delta
        #minimum -= delta
    return minimum, maximum
        
def _correlations(x, y, log=False):
    if log:
        x, y = np.log(x), np.log(y)
    x_y_finite = np.isfinite(x) & np.isfinite(y)
    x, y = x[x_y_finite], y[x_y_finite]
    nDOF = len(x) - 1
    x2 = np.sum((x-y-x.mean()+y.mean())**2) / (np.var(y) * nDOF)
    pc = scipy.stats.pearsonr(x,y)[0]
    return pc, x2

def _scatter_plot(ax,x,y,sizes=None,colors=None,alpha=1.0,marker='o',log=False):
    ax.scatter(x=x,y=y,s=sizes,c=colors,alpha=alpha,marker=marker)
    pc, x2 = _correlations(x, y, log=log)
    ax.annotate('r=%.2f'%(pc), (0.05,0.95), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top')
    ax.annotate('$\\chi^2_m$=%.2f'%(x2), (0.95,0.00), xycoords='axes fraction', horizontalalignment='right', verticalalignment='bottom')
    if log:
        ax.set_xscale('log')
        ax.set_yscale('log')

def _composition_bar(compositions, colors, horizontal=True, ax=None, legend=True):
    n_freqs = len(compositions.index)
    
    x = np.arange(compositions.shape[0])
    fig = None
    if horizontal:
        if ax is None:
            fig, ax = plt.subplots(figsize=(11,1*(n_freqs+2)))
        bottom = np.full_like(x,0)
        for t in colors.index[::1]:
            if t in compositions:
                ax.barh(x, compositions[t], height=0.55, label=t, left=bottom, color=colors[t])
                bottom = bottom + np.array(compositions[t])
            else:
                ax.barh(x, np.zeros(compositions.shape[0]), height=0.55, label=t, left=bottom, color=colors[t])

        ax.set_xlim([0, 1])
        ax.set_xlabel('composition')
        ax.set_ylim(x.min()-0.75,x.max()+0.75)
        ax.set_yticks(x)
        ax.set_yticklabels(compositions.index, ha='right')
        if legend:
            ax.legend(bbox_to_anchor=(-0.0, 1.0), loc='lower left', ncol=6)
    else:
        if ax is None:
            fig, ax = plt.subplots(figsize=(1*(n_freqs+2),8))
        bottom = np.array(x)*0+1
        for t in colors.index[::-1]:
            if t in compositions:
                bottom = bottom - np.array(compositions[t])
                ax.bar(x, compositions[t], width=0.55, label=t, bottom=bottom, color=colors[t])
            else:
                ax.bar(x, np.zeros(compositions.shape[0]), width=0.55, label=t, bottom=bottom, color=colors[t])

        ax.set_ylim([0, 1])
        ax.set_ylabel('composition')
        ax.set_xlim(x.min()-0.75,x.max()+0.75)
        ax.set_xticks(x)
        ax.set_xticklabels(compositions.index, rotation=30,va='top',ha='right')
        if legend:
            ax.legend(bbox_to_anchor=(1, 1), loc='upper left', ncol=1)

    if fig is not None:
        fig.tight_layout()
    
    return fig

def _complement_color(r,g,b):
    h, s, v = rgb_to_hsv((r,g,b))
    _r, _g, _b = hsv_to_rgb(((h+0.5)%1,s,v))
    return _r, _g, _b

def _scatter_frame(df, colors, ax_, id_=None, cmap=None, cmap_vmin_vmax=None, out_min_max=None, joint=True, point_size=3, grid=False, margin=0.25, scale=None, title='', rasterized=True):
    
    pie = False
    if not isinstance(rasterized, bool):
        if rasterized != 'pie':
            raise ValueError(f'`rasterized` has to be boolean or "pie"!')
        if cmap is not None:
            raise ValueError(f'`rasterized=="pie"` cannot use a `cmap`!')
        pie = True
        rasterized = False
        
        color_array = np.array([colors[c] for c in df.columns.difference(['x','y'])])

        dpi = ax_[0].get_figure().get_dpi()
        point_radius = np.sqrt(point_size / np.pi) / 2 * dpi
        
        # convert from pixel to axes units
        point_radius = (ax_[0].transAxes.inverted().transform((point_radius, 0)) - ax_[0].transAxes.inverted().transform((0, 0)))[0]
    
    if id_ is None:
        n_cols = len(colors)
        id_ = np.full(fill_value=-1,shape=(n_cols,2),dtype=object)
    
    n_types = len(colors)
        
    for ax in ax_:
        ax.set_aspect(1)
        ax.grid(grid)
        
    if joint is None:
        _ax_ = ax_[1:]
        _id_ = id_[1:]
    elif joint:
        _ax_ = [None] * n_types
        _id_ = [None] * n_types
    else:
        _ax_ = ax_
        _id_ = id_
    
    axsize = ax_[0].get_window_extent().transformed(ax_[0].get_figure().dpi_scale_trans.inverted()).size
    canvas_size = axsize
    
    coords = df[['x','y']].to_numpy()
    
    # use the largest possible amount of space of the axes
    coords_min = coords.min(axis=0) if len(coords) > 0 else np.nan
    _coords = coords - coords_min
    coords_range = _coords.max(axis=0) if len(coords) > 0 else np.nan
    scale_0 = (canvas_size[0]-2*margin) / coords_range[0]
    scale_1 = (canvas_size[1]-2*margin) / coords_range[1]
    if scale is None:
        scale = min(scale_0, scale_1)
    # center the coords
    offset_0 = (canvas_size[0] - coords_range[0] * scale) / 2
    offset_1 = (canvas_size[1] - coords_range[1] * scale) / 2

    # coords_min is mapped to (offset_0,offset_1)
    # => coords_min-(offset_0,offset_1)/scale is mapped to (0,0)
    # canvas_size / scale is the range
    # => extent_min = coords_min - (offset_0,offset_1) / scale
    # => extent_max = extent_min + canvas_size / scale
    extent_min = coords_min - np.array((offset_0,offset_1)) / scale
    extent_max = extent_min + canvas_size / scale
    extent = [extent_min[0], extent_max[0], extent_min[1], extent_max[1]]
    
    x, y = df['x'], df['y']
    for (t,c),ax,id in zip(colors.items(),_ax_,_id_):
        if pie:
            only_color_array = color_array.copy()
            only_color_array[only_color_array != c] = '#fff0' # transparent white
            
            weights = df[df.columns.difference(['x','y'])].to_numpy()
            
            def plotit(ax, id, color_array):
                for _weights,_x,_y in zip(weights, x, y):
                    ax.pie(_weights,colors=color_array,radius=point_radius,center=(_x,_y), frame=True)

                ax.set_xlim(extent[0:2])
                ax.set_ylim(extent[2:4])

                if out_min_max is not None: # get values for backpropagation of the value range
                    out_min_max[id[0],id[1]] = vals.min(), vals.max()

            if joint is None or not joint:
                plotit(ax, id, only_color_array)
                ax.set_title(f'{title}: {t}' if title is not None and title != '' else f'{t}')
            if joint is None or joint:
                if (t,c) == list(iter(colors.items()))[0]: # plot full pies only once
                    plotit(ax_[0], id_[0],color_array)
            
            
        elif cmap is None:
            #scale = 200.0 * np.random.rand(n)
            r, g, b = to_rgb(c)
            _r, _g, _b = _complement_color(r,g,b) # complement colors for negative values
            if t in df.columns:
                vals = np.maximum(np.minimum(df[t].to_numpy(),1),-1)
                # dont plot invisible points
                visible = vals != 0
                vals, _x, _y = vals[visible], x[visible], y[visible]
                # separately plot efficient cases
                which1p = vals >= +1
                vals1p, _x1p, _y1p = vals[which1p], _x[which1p], _y[which1p]
                which1m = vals <= -1
                vals1m, _x1m, _y1m = vals[which1m], _x[which1m], _y[which1m]
                # and the rest
                remaining = (vals > -1) & (vals < 1)
                vals, _x, _y = vals[remaining], _x[remaining], _y[remaining]
                color1p = [r,g,b]
                color1m = [_r,_g,_b]
                color = np.hstack([np.array([[r,_r],[g,_g],[b,_b]]).T[(1-np.sign(vals).astype(np.int8))//2],np.abs(vals)[:,None]])
                def plotit(ax, id):
                    if len(_x) > 0:
                        ax.scatter(_x, _y, color=color, s=point_size, edgecolors='none', rasterized=rasterized)
                    if len(_x1p) > 0:
                        ax.scatter(_x1p, _y1p, color=color1p, s=point_size, edgecolors='none', rasterized=rasterized)
                    if len(_x1m) > 0:
                        ax.scatter(_x1m, _y1m, color=color1m, s=point_size, edgecolors='none', rasterized=rasterized)
                    
                    ax.set_xlim(extent[0:2])
                    ax.set_ylim(extent[2:4])

                    if out_min_max is not None: # get values for backpropagation of the value range
                        out_min_max[id[0],id[1]] = vals.min(), vals.max()

                if joint is None or not joint:
                    plotit(ax, id)
                    ax.set_title(f'{title}: {t}' if title is not None and title != '' else f'{t}')
                if joint is None or joint:
                    plotit(ax_[0], id_[0])
        else:
            if t in df.columns:
                vals = df[t].to_numpy()
                def plotit(ax, id):

                    if cmap_vmin_vmax is not None:
                        vmin, vmax = cmap_vmin_vmax
                        norm = Normalize(vmin=vmin, vmax=vmax)
                    else:
                        vmin, vmax = vals.min(), vals.max()
                        norm = None
                    
                    ax.scatter(x, y, c=vals, s=point_size, edgecolors='none', cmap=cmap, norm=norm, rasterized=rasterized)
                    
                    ax.set_xlim(extent[0:2])
                    ax.set_ylim(extent[2:4])

                    if out_min_max is not None: # get values for backpropagation of the value range
                        out_min_max[id[0],id[1]] = vmin, vmax

                if joint is None or not joint:
                    plotit(ax, id)
                    ax.set_title(f'{title}: {t}' if title is not None and title != '' else f'{t}')
                if joint is None or joint:
                    plotit(ax_[0], id_[0])

@njit(cache=True)
def _make_stencil(point_radius):
    extra_size = int(np.ceil(point_radius - 0.5))
    stencil_size = 1 + 2 * extra_size
    stencil = np.zeros((stencil_size,stencil_size))
    # monte carlo evaluate density profile of a disc... There have to be better options, but maybe not more straightforward ones...
    # for one quadrant
    np.random.seed(42)
    point_radius2 = point_radius**2
    n_samples = int(point_radius2*10000)
    samples = np.random.uniform(0,1,size=(n_samples,2)) * point_radius
    quadrant = np.zeros((1+extra_size,1+extra_size))
    for sample in samples:
        d2 = (sample**2).sum()
        if d2 < point_radius2:
            _sample = sample+0.5
            quadrant[int(_sample[0]),int(_sample[1])] += 1
    # symmetrize octant
    quadrant = quadrant + quadrant.T # using += with numba and matrices gives wrong results: https://github.com/numba/numba/issues/6949 
    # replicate to other quadrants
    stencil[:-extra_size,:-extra_size] += quadrant[::-1,::-1]
    stencil[extra_size:,:-extra_size] += quadrant[::,::-1]
    stencil[:-extra_size,extra_size:] += quadrant[::-1,::]
    stencil[extra_size:,extra_size:] += quadrant[::,::]
    # normalize
    stencil /= stencil.sum()
    return stencil
@njit(cache=True)
def _draw_weights(canvas_size, weights, data, offset_x, offset_y, stencil):
    extra_size = (stencil.shape[0] - 1) // 2
    canvas = np.zeros(canvas_size)
    for w,(x,y) in zip(weights,data):
        xo,yo = int(x+offset_x),int(y+offset_y)
        canvas[(xo-extra_size):(xo+extra_size+1),(yo-extra_size):(yo+extra_size+1)] += w * stencil
    return canvas
                    
def _render_frame(df, colors, ax_, id_=None, cmap=None, cmap_vmin_vmax=None, out_min_max=None, joint=True, point_size=3, grid=False, margin=0.25, scale=None, title='', color_mix_mode='xyv'):
    
    typing = df[df.columns.intersection(colors.index)]
    coords = df[['x','y']].to_numpy()
    
    coords_min = coords.min(axis=0) if len(coords) > 0 else np.nan
    coords = coords - coords_min
    coords_range = coords.max(axis=0) if len(coords) > 0 else np.nan
    
    if cmap is None: # empty dots dont get rendered
        nonzero = np.abs(typing.to_numpy()).sum(axis=1) > 0
        typing = typing.loc[nonzero]
        coords = coords[nonzero]
    
    n_types = len(typing.columns)
    weights = typing
    
    dpi = ax_[0].get_figure().get_dpi()
    
    if id_ is None:
        n_cols = len(colors)
        id_ = np.full(fill_value=-1,shape=(n_cols,2),dtype=object)
        
    for ax in ax_:
        ax.grid(grid)
        
    if joint is None:
        _ax_ = ax_[1:]
        _id_ = id_[1:]
    elif joint:
        _ax_ = [None] * n_types
        _id_ = [None] * n_types
    else:
        _ax_ = ax_
        _id_ = id_
    
    point_radius = np.sqrt(point_size / np.pi) / 72 * dpi
    
    margin = int(np.ceil(margin * dpi + point_radius))
    axsize = ax_[0].get_window_extent().transformed(ax_[0].get_figure().dpi_scale_trans.inverted()).size
    canvas_size = ((axsize)*dpi).astype(int)
    
    # use the largest possible amount of space of the axes
    scale_0 = (canvas_size[0]-1-2*margin) / coords_range[0]
    scale_1 = (canvas_size[1]-1-2*margin) / coords_range[1]
    if scale is None:
        scale = min(scale_0, scale_1)
    else:
        scale = scale * dpi
    # center the coords
    offset_0 = (canvas_size[0]-1 - coords_range[0] * scale) / 2
    offset_1 = (canvas_size[1]-1 - coords_range[1] * scale) / 2

    # coords_min is mapped to (offset_0,offset_1)
    # => coords_min-(offset_0,offset_1)/scale is mapped to (0,0)
    # canvas_size / scale is the range
    # => extent_min = coords_min - (offset_0,offset_1) / scale
    # => extent_max = extent_min + canvas_size / scale
    extent_min = coords_min - np.array((offset_0,offset_1)) / scale
    extent_max = extent_min + canvas_size / scale
    extent = [extent_min[0], extent_max[0], extent_min[1], extent_max[1]]
    
    coords = coords * scale
    
    stencil = _make_stencil(point_radius)
    
    canvases = { c: _draw_weights((canvas_size[0],canvas_size[1]), weights[c].to_numpy(), coords, offset_0, offset_1, stencil) for c in weights.columns}
    
    sum_canvas = sum(canvases.values())
    finite_sum = sum_canvas!=0
    
    colors = {i:np.array(to_rgb(colors[i])) for i in colors.index}
    
    # alpha just tells us whether there is data on the pixel or not
    def get_alpha(norm_canvas, stencil):
        alpha = np.log1p(np.abs(norm_canvas)) / np.log1p(stencil.max())
        alpha[alpha>1] = 1 # cut off values with too high alpha
        return alpha
    def add_alpha(canvas, norm_canvas, stencil):
        canvasA = np.zeros_like(canvas, shape=(*canvas.shape[:-1],4))
        canvasA[...,:-1] = canvas
        canvasA[...,-1] = get_alpha(norm_canvas, stencil)
        return canvasA

    if cmap is None:
        #norm_canvas = _draw_weights((canvas_size[0],canvas_size[1]), np.ones(shape=len(weights)), coords, offset_0, offset_1, stencil)
        norm_canvas = sum([np.abs(canvas) for canvas in canvases.values()])
        #finite_norm = norm_canvas!=0
        
        if joint is None or joint:
            canvas = mix_base_colors(np.stack([canvases[t] for t in canvases],axis=-1), np.array([colors[t] for t in canvases]), mode=color_mix_mode)
            for i in range(3): # remove zero weight colors
                canvas[...,i][~finite_sum] = 1
            canvas[canvas>1] = 1 # numerical accuracy issues

            canvasA = add_alpha(canvas, norm_canvas, stencil)

            ax_[0].imshow(canvasA.swapaxes(0,1), origin='lower', extent=extent)
            
            if out_min_max is not None: # get values for backpropagation of the value range
                out_min_max[id_[0][0],id_[0][1]] = sum(canvases.values()).min()/stencil.max(), sum(canvases.values()).max()/stencil.max()
        
        if joint is None or not joint:
            for (t,c),ax,id in zip(colors.items(),_ax_,_id_):
                if t in canvases:
                    canvas  = canvases[t][...,None] * colors[t]
                    finite_t = canvases[t]!=0
                    for i in range(3): # normalize the colors by the weights
                        canvas[...,i][finite_t] = canvas[...,i][finite_t] / np.abs(canvases[t][finite_t])
                    canvas[canvas>1] = 1 # numerical accuracy issues

                    canvasA = add_alpha(canvas, np.abs(canvases[t]), stencil)

                    ax.imshow(canvasA.swapaxes(0,1), origin='lower', extent=extent)
                    ax.set_title(f'{title}: {t}' if title is not None and title != '' else f'{t}')

                    if out_min_max is not None: # get values for backpropagation of the value range
                        out_min_max[id[0],id[1]] = canvases[t].min()/stencil.max(), canvases[t].max()/stencil.max()
    else:
        norm_canvas = _draw_weights((canvas_size[0],canvas_size[1]), np.ones(shape=len(weights)), coords, offset_0, offset_1, stencil)
        finite_norm = norm_canvas!=0
        
        if joint is None or joint:
            canvas = sum_canvas.copy()
            canvas[finite_norm] = canvas[finite_norm] / norm_canvas[finite_norm] # normalize the values by the weights

            alpha = get_alpha(norm_canvas, stencil)

            if cmap_vmin_vmax is not None:
                vmin, vmax = cmap_vmin_vmax
            else:
                vmin, vmax = canvas[finite_norm].min(), canvas[finite_norm].max()
            norm = Normalize(vmin=vmin, vmax=vmax)

            ax_[0].imshow(canvas.swapaxes(0,1), alpha=alpha.swapaxes(0,1), origin='lower', extent=extent, cmap=cmap, norm=norm)
            
            if out_min_max is not None: # get values for backpropagation of the value range
                out_min_max[id_[0][0],id_[0][1]] = vmin, vmax
        
        if joint is None or not joint:
            for (t,c),ax,id in zip(colors.items(),_ax_,_id_):
                if t in canvases:
                    canvas = canvases[t].copy()
                    canvas[finite_norm] = canvas[finite_norm] / norm_canvas[finite_norm] # normalize the values by the weights

                    alpha = get_alpha(norm_canvas, stencil)

                    if cmap_vmin_vmax is not None:
                        vmin, vmax = cmap_vmin_vmax
                        norm = Normalize(vmin=vmin, vmax=vmax)
                    else:
                        vmin, vmax = canvas[finite_norm].min(), canvas[finite_norm].max()
                        norm = None

                    ax.imshow(canvas.swapaxes(0,1), alpha=alpha.swapaxes(0,1), origin='lower', extent=extent, cmap=cmap, norm=norm)
                    ax.set_title(f'{title}: {t}' if title is not None and title != '' else f'{t}')

                    if out_min_max is not None: # get values for backpropagation of the value range
                        out_min_max[id[0],id[1]] = vmin, vmax

def _set_axes_labels(ax, axes_labels):
    if axes_labels is not None:
        if not pd.api.types.is_list_like(axes_labels) or len(axes_labels) != 2:
            raise ValueError(f'`axes_labels` {axes_labels!r} is not a list-like of 2 elements!')
        ax.set_xlabel(axes_labels[0])
        ax.set_ylabel(axes_labels[1])
                        
def spatial_distribution_plot(typing_data, coords, colors, n_cols=1, axs=None, joint=None, normalize=True, point_size=3, cmap=None, cmap_vmin_vmax=None, out_min_max=None, scale=None, grid=False, margin=0.25, render=False, rasterized=True, noticks=False, axes_labels=None, on_data_legend=None):
    n_types = len(colors.index)
    if joint is None:
        n_y = (n_types + n_cols) // n_cols
    elif joint:
        n_cols = 1
        n_y = 1
    else:
        n_y = (n_types + n_cols - 1) // n_cols
    n_solutions = len(typing_data)
    if axs is None:
        fig, axs = plt.subplots(n_y,n_solutions*n_cols,figsize=(7*n_solutions*n_cols,7*n_y), squeeze=False)
    else:
        fig = None
        if axs.shape != (n_y,n_solutions*n_cols):
            raise Exception('spatial_distribution_plot got axs of wrong dimensions: need %s, got %s' % ((n_y,n_solutions*n_cols),axs.shape))
    _axs = np.full(fill_value=None,shape=(n_solutions,n_cols*n_y),dtype=object)
    _idx = np.full(fill_value=-1,shape=(n_solutions,n_cols*n_y,2),dtype=object)
    counter = 0
    for i in range(n_cols):
        for j in range(n_y):
            for c in range(n_solutions):
                _axs[c,counter] = axs[j,i*n_solutions+c]
                _idx[c,counter,:] = np.array([j,i*n_solutions+c])
            counter += 1
    
    for id_,ax_,df,title in zip(_idx,_axs,typing_data,typing_data.index):
        df = df.copy() # dont change the original df
        df = df[colors.index.intersection(df.columns)]
        if normalize: # normalize values per column to give alphas between 0 and 1
            df = df - df.min(axis=0).to_numpy()
            df = df / df.max(axis=0).to_numpy()
            
        df[['x','y']] = coords
        
        df = df.loc[:,~df.isna().all(axis=0)] # remove all-nan columns
        df = df.loc[~df.isna().any(axis=1)] # remove any-nan rows
        
        if isinstance(render, bool) and not render:
            _scatter_frame(df, colors, ax_, id_=id_, cmap=cmap, cmap_vmin_vmax=cmap_vmin_vmax, out_min_max=out_min_max, joint=joint, point_size=point_size, grid=grid, margin=margin, scale=scale, title=title, rasterized=rasterized)
        else:
            if not rasterized:
                raise ValueError(f'`render!=False` only works when `rasterized==True`')
            if isinstance(render, bool):
                color_mix_mode = 'xyv'
            else:
                color_mix_mode = render
            _render_frame (df, colors, ax_, id_=id_, cmap=cmap, cmap_vmin_vmax=cmap_vmin_vmax, out_min_max=out_min_max, joint=joint, point_size=point_size, grid=grid, margin=margin, scale=scale, title=title, color_mix_mode=color_mix_mode)
        
        for ax in ax_:
            
            _set_axes_labels(ax, axes_labels)

            if noticks:
                ax.set_xticks([])
                ax.set_yticks([])
                    
        if joint is None or joint:
            ax_[0].set_title(title)
        
        if on_data_legend is not None:
            def weighted_median(df, value_col, weights_col):
                df = df[[value_col, weights_col]].sort_values(value_col)
                return df[value_col][df[weights_col].cumsum() >= 0.5 * df[weights_col].sum()].iloc[0]
            def find_closest_point(center, coords, weights):
                coords = coords.loc[weights >= weights.max() * 0.9].to_numpy() # only consider points with weights above 90% of the max weight
                dists = utils.cdist(np.array(center).reshape((1,2)),coords)
                return coords[np.argmin(dists.flatten()).flatten()[0]]
            
            for annotation in [c for c in df.columns if c not in ['x','y']]:
                medians = []
                for direction in ['x','y']:
                    medians.append(weighted_median(df, direction, annotation))
                closest = find_closest_point(medians, df[['x','y']], df[annotation])
                for ax in ax_:
                    ax.text(*closest, on_data_legend[annotation] if annotation in on_data_legend else annotation, ha='center', va='center')
    
    return fig

def write_results_to_adata(adata, typing_data, pfn_factors=None, pfn_key='platform_normalization_factors'):
    if pfn_key in adata.varm:
        pfns = adata.varm[pfn_key]
    else:
        pfns = pd.DataFrame(index=adata.var.index)
    for typing, data in typing_data.items():
        adata.obsm[typing] = data.reindex(adata.obs.index)
        if pfn_factors is not None and typing in pfn_factors and pfn_factors[typing] is not None:
            pfns[typing] = pfn_factors[typing]
    if len(pfns.columns) > 0:
        adata.varm[pfn_key] = pfns


[docs]
def get_default_colors(n, offset=0):
    """\
    Chooses default colors.
    This is a convenience wrapper around :func:`seaborn.color_palette` which
    provides a quasi-periodicity of 10, i.e. every 10 colors, the colors are
    related.
    
    Parameters
    ----------
    n
        Number of colors to choose OR a list of keys to choose colors for.
    offset
        The number of chosen colors to skip before starting to use them. This
        is useful for generating different sets of colors.
    
    Returns
    -------
    If `n` is a number, returns a list of colors. If `n` is a list-like,\
    returns a mapping of the elements of `n` to colors.
    
    """
  
    if pd.api.types.is_list_like(n):
        default_colors = get_default_colors(len(n), offset)
        return {name:color for name,color in zip(n,default_colors)}
    else:
        default_colors = [ to_hex(c) for c in [*sns.color_palette("bright"),*sns.color_palette("deep"),*sns.color_palette("dark"),*sns.color_palette("pastel")] ]
        default_colors *= ((n+offset) // len(default_colors) + 1)
        return default_colors[offset:(n+offset)]



[docs]
def mix_base_colors(weights, base_colors_rgb, mode='xyv'):
    """\
    Mix colors "additively". In contrast to weighted averages over "rgb" values
    (which results in quite dark colors), the average can be done in "xyv"
    space, which is "hsv" with the "hs" part converted from polar to cartesian
    coordinates.
    
    Parameters
    ----------
    weights
        A weight tensor with last dimension `n_base` describing mixtures of
        `n_base` colors; this can be a :class:`~numpy.ndarray` or a
        :class:`~pandas.DataFrame`.
    base_colors_rgb
        An `n_base X 3` matrix defining the base colors in rgb space; this must
        be a :class:`~numpy.ndarray`.
    mode
        The mixing mode; available are:
        
        - 'rgb': average the rgb values in the rgb cube
        - 'xyv': average the xyz values in the hsv cylider
        
    Returns
    -------
    Returns the color mixtures depending on the type of `weights` either as\
    :class:`~numpy.ndarray` or :class:`~pandas.DataFrame`.
    
    """
    
    weights_index = None
    if hasattr(weights, 'to_numpy'):
        weights_index = weights.index
        weights = weights.to_numpy()
    weights = weights / weights.sum(axis=-1)[...,None]
    if mode == 'xyv':
        base_colors_hsv = np.array([matplotlib.colors.rgb_to_hsv(base_color_rgb[:3]) for base_color_rgb in base_colors_rgb])
        base_colors_xyv = np.array([
            np.cos(base_colors_hsv[:,0] * 2 * np.pi) * base_colors_hsv[:,1],
            np.sin(base_colors_hsv[:,0] * 2 * np.pi) * base_colors_hsv[:,1],
            base_colors_hsv[:,2]
        ]).T
        mixed_colors_xyv = weights @ base_colors_xyv
        mixed_colors_hsv = np.stack([
            np.arctan2(mixed_colors_xyv[...,1], mixed_colors_xyv[...,0])/(2 * np.pi),
            np.sqrt(mixed_colors_xyv[...,0]**2 + mixed_colors_xyv[...,1]**2),
            mixed_colors_xyv[...,2]
        ],axis=-1)
        mixed_colors_hsv[mixed_colors_hsv[...,0]<0,0] += 1
        mixed_colors_rgb = matplotlib.colors.hsv_to_rgb(mixed_colors_hsv)
        if weights_index is not None:
            mixed_colors_rgb = pd.DataFrame(mixed_colors_rgb, index=weights_index)
    elif mode == 'rgb':
        mixed_colors_rgb = weights @ base_colors_rgb
    else:
        raise ValueError(f'The mode "{mode}" is not implemented!')
    return mixed_colors_rgb


def _filter_types(typing_data, types, colors, show_only):
    if show_only is not None:
        show_only = pd.Index(show_only)
        if show_only.isin(types).all():
            colors = colors[show_only]
            types = colors.index
        else:
            raise Exception('Not all selected types %s are available in the data %s!' % (show_only, types))
    
    typing_data = typing_data.map(lambda data: data.reindex(columns=types))
    
    return typing_data, types, colors


def _get_colors(colors, types):
    if colors is not None:
        colors = pd.Series(colors)
        if types.isin(colors.index).all():
            #colors = colors[colors.index.intersection(types)]
            types = colors.index
        else:
            raise Exception('Not all types %s are given colors with %s!' % (types, colors))
    else:
        colors = pd.Series(get_default_colors(len(types)), index=types)
    
    return colors, types

def _get_adatas(adata):
    if isinstance(adata, ad.AnnData):
        adatas = pd.Series(index=[''],dtype=object)
        adatas[''] = adata
    elif isinstance(adata, pd.DataFrame):
        adatas = pd.Series(index=[''],dtype=object)
        adatas[''] = utils.dataframe2anndata(adata, None, None)
    elif isinstance(adata, dict):
        #adatas = pd.Series(adata) # it could be so simple - but it does not work for adatas...
        adatas = pd.Series(index=adata.keys(),dtype=object)
        for k,v in adata.items():
            if isinstance(v, pd.DataFrame):
                v = utils.dataframe2anndata(v, None, None)
            adatas[k] = v
    else:
        adatas = adata
    
    if (adatas.index.value_counts() != 1).any():
        raise ValueError('The series of adatas has non-unique indices: %s!', adatas.index)
    
    return adatas

def _validate_args(adata, keys, colors, show_only, reads=False, method_labels=None, counts_location=None, compositional=True):
    
    adatas = _get_adatas(adata)
    
    if isinstance(keys, str):
        methods = [keys]
    else:
        methods = keys
    try:
        #methods = pd.Series({k:v for k,v in methods.items()}) # check basically whether methods is a dict or pd.Series (having an items method), mapping sample names to methods
        # check basically whether methods is a dict or pd.Series (having an items method), mapping sample names to methods
        _methods = {}
        for k,v in methods.items():
            if isinstance(v,str):
                v = [v]
            _methods[k] = v
        methods = pd.Series(_methods)
    except:
        methods = pd.Series([methods]*len(adatas), index=adatas.index) # assume same methods available for all samples
    
    typing_data = []
    types = pd.Index([],dtype=object)
    for sample, adata in adatas.items():
            
        for method in methods[sample]:
            if pd.api.types.is_list_like(method):
                data = pd.DataFrame(index=adata.obs.index)
                for element in method:
                    if element in adata.obs:
                        data[element] = adata.obs[element]
                        if not pd.api.types.is_numeric_dtype(data[element]):
                            raise Exception(f'The adata obs key {element} did contain non-numeric data%s!' % ('' if sample == '' else (' for sample "%s"' % sample)))
                    elif element in adata.var.index:
                        _data = adata[:,element].X
                        if scipy.sparse.issparse(_data):
                            _data = _data.A
                        data[element] = _data.flatten()
                    else:
                        raise Exception(f'The key {element} is neither in obs.columns nor in var.index%s!' % ('' if sample == '' else (' for sample "%s"' % sample)))
            elif method in adata.obsm:
                data = adata.obsm[method].copy()
            elif method in adata.obs:
                iscat = hasattr(adata.obs[method], 'cat')
                number = pd.api.types.is_numeric_dtype(adata.obs[method])
                if iscat or not number:
                    data = pd.get_dummies(adata.obs[method])
                    if data.shape[1] > 100:
                        print(f'More than 30 categories were discovered in column `.obs[{method}]`, which probably leads to slow plotting! (This message can be removed, if the column is made a categorical column).')
                else:
                    data = pd.DataFrame({method:adata.obs[method]})
            else:
                raise Exception(f'The method key {method} is neither obs(m) key nor a list-like%s!' % ('' if sample == '' else (' for sample "%s"' % sample)))
                
            if compositional:
                
                data *= (data>=0).to_numpy() # some methods may export negative values... (RCTD did that once)
                data = (data / data.fillna(0).sum(axis=1).to_numpy()[:,None]).fillna(0)
                if reads: # scale by read count
                    counts = get.counts(adata, counts_location=counts_location)[adata.obs.index]
                    if counts.shape[1] != 0: # ...but only if there is data for that
                        data *= np.array(counts.X.sum(axis=1)).flatten()[:,None]
                
                if compositional == 'catsize':
                    sizes = get_cellsize(adata, data)
                    data /= sizes[data.columns].to_numpy()
                    data = (data / data.fillna(0).sum(axis=1).to_numpy()[:,None]).fillna(0)
            
            types = types.union(data.columns,sort=False)
            method_label = method if ((method_labels is None) or (method not in method_labels)) else method_labels[method]
            if sample == '' and method_label == '':
                name = ''
            elif sample == '':
                name = method_label
            elif isinstance(method_label, str) and method_label == '':
                name = sample
            else:
                name = f'{sample}; {method_label}'
            typing_data.append((name, sample, method, data))
            
            
    typing_data = pd.DataFrame(typing_data, columns=['name','sample','method','data']).set_index('name')
    
    colors, types = _get_colors(colors, types)
    
    if compositional and len(types) < 1:
        print(f'`compositional==True`, but there were less than 2 categories: {types!r}')
    
    typing_data['data'], types, colors = _filter_types(typing_data['data'], types, colors, show_only)
    
    return typing_data, adatas, methods, types, colors

def _validate_scatter_args(adata, position_key, keys, colors, show_only, method_labels=None, counts_location=None, compositional=True):
    typing_data, adatas, methods, types, colors = _validate_args(adata, keys, colors, show_only, method_labels=method_labels, counts_location=counts_location, compositional=compositional)
    
    coords = {}
    for sample, adata in adatas.items():
        _coords = get.positions(adata, position_key)
        _coords.columns = ['x','y']
        coords[sample] = _coords#.rename(columns={_coords.columns[0]:'x',_coords.columns[1]:'y'})
    
    return typing_data, adatas, methods, types, colors, coords


[docs]
def subplots(
    n_x=1,
    n_y=1,
    axsize=(5,5),
    hspace=0.15,
    wspace=0.15,
    x_padding=None,
    y_padding=None,
    title=None,
    sharex='none',
    sharey='none',
    width_ratios=None,
    height_ratios=None,
    x_shifts=None,
    y_shifts=None,
    dpi=None,
    **kwargs,
):
    """\
    Creates a new figure with a grid of subplots.
    This is a convenience wrapper around :func:`matplotlib.pyplot.subplots`
    with parameters for axis instead of figure and absolute instead of relative
    units.
    
    Parameters
    ----------
    n_x
        Number of plots in horizontal/x direction
    n_y
        Number of plots in vertical/y direction
    axsize
        Size of a single axis in the plot
    hspace
        Relative vertical spacing between plots
    wspace
        Relative horizontal spacing between plots
    x_padding
        Absolute horizontal spacing between plots; this setting overrides
        `wspace`; if `None`, use the value from `wspace`
    y_padding
        Absolute vertical spacing between plots; this setting overrides
        `hspace`; if `None`, use the value from `hspace`
    title
        Sets the figure suptitle
    sharex
        Parameter for sharing the x-axes between the subplots; see the
        documentation of :func:`matplotlib.pyplot.subplots`
    sharey
        Parameter for sharing the y-axes between the subplots; see the
        documentation of :func:`matplotlib.pyplot.subplots`
    width_ratios
        Sets the ratios of widths for the columns of the subplot grid keeping
        the width of the widest column; if `None`, all columns have the same
        width
    height_ratios
        Sets the ratios of heights for the rows of the subplot grid keeping the
        height of the highest row; if `None`, all rows have the same height
    x_shifts
        The absolute shifts in position in horizontal/x direction per column of
        subplots; if `None`, the columns are not shifted
    y_shifts
        The absolute shifts in position in vertical/y direction per row of
        subplots; if `None`, the rows are not shifted
    dpi
        The dpi setting to use for this figure
    **kwargs
        Extra keyword arguments are forwarded to
        :func:`matplotlib.pyplot.subplots`
        
    Returns
    -------
    A pair of the created :class:`~matplotlib.figure.Figure` and an 2d\
    :class:`~numpy.ndarray` of :class:`~matplotlib.axes.Axes`.
    
    """
  
    if x_padding is not None:
        wspace = x_padding / axsize[0]
    if y_padding is not None:
        hspace = y_padding / axsize[1]

    if width_ratios is None:
        effective_n_x = n_x
        effective_wspace = wspace
    else:
        effective_n_x = sum(width_ratios) / max(width_ratios)
        effective_wspace = wspace * n_x / effective_n_x
        
    if height_ratios is None:
        effective_n_y = n_y
        effective_hspace = hspace
    else:
        effective_n_y = sum(height_ratios) / max(height_ratios)
        effective_hspace = hspace * n_y / effective_n_y
    
    # find the values of the figure size which will keep the axis size identical for all numbers of columns and rows...
    fig_height = axsize[1] * (effective_n_y + hspace * (n_y - 1))
    fig_width = axsize[0] * (effective_n_x + wspace * (n_x - 1))
    
    top = 1.0
    if title != None:
        title_space = 0.75
        fig_height += title_space
        top = 1 - title_space / fig_height
    
    if dpi is not None:
        kwargs = {**kwargs}
        kwargs['dpi'] = dpi
    
    fig, axs = plt.subplots(n_y,n_x,figsize=(fig_width,fig_height), squeeze=False, sharex=sharex, sharey=sharey, gridspec_kw={'wspace':effective_wspace,'hspace':effective_hspace,'left':0,'right':1,'top':top,'bottom':0,'width_ratios': width_ratios,'height_ratios': height_ratios}, **kwargs)
    
    if title is not None:
        fig.suptitle(title, fontsize=16, y=1)
    
    if x_shifts is not None or y_shifts is not None:
        if x_shifts is None:
            x_shifts = [0.0] * n_x
        else:
            y_shifts = [0.0] * n_y
        for i_x in range(n_x):
            for i_y in range(n_y):
                [left,bottom,width,height] = axs[i_y,i_x].get_position().bounds
                axs[i_y,i_x].set_position([left+x_shifts[i_x],bottom+y_shifts[i_y],width,height])
    
    return fig, axs


def _add_legend_or_colorbars(fig, axs, colors, cmap=None, min_max=None, scale_legend=1.0):
    
    if cmap is None:
        axs[0,-1].legend(handles=[mpatches.Patch(color=color, label=ind) for (ind, color) in colors.items() ],
            bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
        
    elif cmap is not None:
        rel_dpi_factor = fig.get_dpi() / 72
        height_pxl = 200 * rel_dpi_factor * scale_legend
        width_pxl = 15 * rel_dpi_factor * scale_legend
        offset_top_pxl = 0 * rel_dpi_factor * scale_legend
        offset_left_pxl = 10 * rel_dpi_factor * scale_legend

        for irow in range(axs.shape[0]):
            for jcol in range(axs.shape[1]):
                ax = axs[irow,jcol]
                left,bottom = fig.transFigure.inverted().transform(ax.transAxes.transform((1,1))+np.array([offset_left_pxl,-offset_top_pxl-height_pxl]))
                width,height = fig.transFigure.inverted().transform(fig.transFigure.transform((0,0))+np.array([width_pxl,height_pxl]))
                cax = fig.add_axes((left, bottom, width, height))
                norm = Normalize(vmin=min_max[irow,jcol,0], vmax=min_max[irow,jcol,1])
                fig.colorbar(ScalarMappable(norm=norm, cmap=cmap), cax=cax)


[docs]
def scatter(
    adata,
    keys,
    position_key=('x','y'),
    group_key=None,
    colors=None,
    show_only=None,
    axsize=(5,5),
    padding=0.5,
    margin=0.0,
    sharex=False,
    sharey=False,
    share_scaling=True,
    n_cols=1,
    joint=True,
    method_labels=None,
    counts_location=None,
    compositional=False,
    normalize=False,
    point_size=3,
    cmap=None,
    cmap_vmin_vmax=None,
    legend=True,
    on_data_legend=None,
    title=None,
    render=True,
    rasterized=True,
    background_color=None,
    grid=False,
    noticks=False,
    axes_labels=None,
    ax=None,
):
    
    """\
    Scatter plots of annotation.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs` and/or
        `.obsm`. Can also be a mapping of labels to :class:`~anndata.AnnData`
        to specify multiple datasets. The :class:`~anndata.AnnData` instances
        can be replaced also by :class:`~pandas.DataFrame`, which are then
        treated like the `.obs` of an :class:`~anndata.AnnData`.
    keys
        The `.obs`/`.obsm` annotation keys to compare. Can be a single string,
        or a list of strings, or a mapping of the labels of `adata` to strings
        or lists of strings. In the list or mapping variant, categorical `.obs`
        keys can be replaced by list-likes of numerical `.obs` keys or gene
        names can be used.
    position_key
        The `.obsm` key or array-like of `.obs` keys with the position space
        coordinates.
    group_key
        An `.obs` key with categorical group information to split `adata` prior
        to plotting. This works only if `adata` is a single
        :class:`~anndata.AnnData` instance.
    colors
        A mapping of annotation values to colors. If `None`, default colors are
        used.
    show_only
        A subset of annotation values to restrict the plotting to.
    axsize
        Tuple of width and height of a single axis. If one of them is `None`,
        it is determined from the aspect ratio of the data. If it is a single
        scalar value, then this is interpreted as a conversion factor from
        data units to axsize units and `share_scaling` is ignored.
    padding
        The absolute padding between the plots.
    margin
        The absolute margin between the outermost data points and the boundary
        of the plot
    sharex
        Whether to use common x axis for all axes.
    sharey
        Whether to use common y axis for all axes.
    share_scaling
        Whether to have the units in all plots be of the same size in pixels
    n_cols
        Number of "columns" to plot: If larger than 1 splits columns of plots
        into `n_cols` columns.
    joint
        Whether to plot only one scatter plot with all annotation categories or
        only the scatter plots with one annotation category per plot. If
        `None`, plot both.
    method_labels
        A mapping from the strings in `keys` and `basis_keys` to (shorter)
        names to show in the plot.
    counts_location
        A string or tuple specifying where the count matrix is stored, e.g.
        `'X'`, `('raw','X')`, `('raw','obsm','my_counts_key')`,
        `('layer','my_counts_key')`, ... For details see
        :func:`~tacco.get.counts`.
    compositional
        Whether the annotation is to be interpreted as compositional data or as
        arbitrary numbers. Compositional data is normalized to sum to 1 per
        observation. Can also be 'catsize', which rescales the compositions by
        the average observed size of an annotation category in terms of the
        contents of `.X`, e.g. reads.
    normalize
        Whether to shift the data to non-negative values and normalize them by
        their maximum.
    point_size
        The size of the points in the plot. Like in matplotlib, this is a
        measure of point area and provided in units of "squared points"
        corresponding to (1/72)^2 inch^2 = (25.4/72)^2 mm^2.
    cmap
        A string/colormap to override the `colors` with. Makes sense mostly for
        numeric data.
    cmap_vmin_vmax
        A tuple giving the range of values for the colormap.
    legend
        Whether to plot a legend
    on_data_legend
        A mapping from annotation values to (shortened) versions of the labels
        to use for labels on the plot at the center of the annotation;
        annotations not occurring in the mapping are used as is; if `None`, no
        annotation is plotted on the data.
    title
        The title of the figure
    render
        Whether the scatterplot should be custom rendered using the dpi setting
        from matplotlib or plotted using a matplotlib's scatterplot. If `True`,
        the different annotations from the same (and overlapping) positions are
        added up symmetrically, if `False`, they are plottet on top of each
        other using an alpha channel proportional to the weight. `True` also
        has the advantage that only the scatter part of the figure will be
        exported as pixelated version if the plot is exported as vectorgraphic,
        with the rest like labels and axes being exported as a vectorgraphic.
        This parameter provides control over the type of color averaging in the
        process of the rendering by specifying one of the modes available in
        :func:`~tacco.plots.mix_base_colors`, e.g. "xyv" or "rgb", with "xyv"
        being equivalent to setting `True`.
    rasterized
        Whether to rasterize the interior of the plot, even when exported later
        as vectorgraphic. This leads to much smaller plots for many (data)
        points. `rasterized==False` is incompatible with `render==True` or
        string.
        This parameter provides experimental support for plotting pie charts
        per dot via ´rasterized=="pie"´ and ´render==False´. This is much
        slower, so only usable for very few points.
    background_color
        The background color to draw the points on.
    grid
        Whether to draw a grid
    noticks
        Whether to switch off ticks on the axes.
    axes_labels
        Labels to write on the axes as an list-like of the two labels.
    ax
        The 2d array of :class:`~matplotlib.axes.Axes` instances to plot on.
        The array dimensions have to agree with the number of axes which would
        be created automatically if `ax` was not supplied. If it is a single
        instance it is treated as a 1x1 array.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    if group_key is not None:
        if not isinstance(adata, ad.AnnData):
            raise ValueError(f'The `group_key` {group_key!r} is not `None`, but `adata` is not a single `AnnData` instance!')
        if group_key not in adata.obs:
            raise ValueError(f'The `group_key` {group_key!r} is not available in `adata.obs`!')
        adata = { c: adata[df.index] for c, df in adata.obs.groupby(group_key, observed=False) if len(df) > 0 }
    
    typing_data, adatas, methods, types, colors, coords = _validate_scatter_args(adata, position_key, keys, colors, show_only, method_labels=method_labels, counts_location=counts_location, compositional=compositional)
    n_solutions, n_samples, n_types = len(typing_data), len(adatas), len(types)
    
    if joint is None:
        n_types = n_types + 1
    elif joint:
        n_types = 1
        n_cols = 1
    n_x = n_solutions*n_cols
    n_y = (n_types + n_cols - 1) // n_cols
    
    axsize = np.array(axsize)
    scale = None
    
    # special treatment of various semi-automatic axsizes
    if len(axsize.shape) == 0 or axsize[0] is None or axsize[1] is None or ax is not None or share_scaling:
        minxs, maxxs = [], []
        minys, maxys = [], []    
        for i_sample, sample in enumerate(typing_data['sample'].unique()):
            minx,maxx = get_min_max(coords[sample].iloc[:,0], log=False)
            miny,maxy = get_min_max(coords[sample].iloc[:,1], log=False)
            minxs.append(minx), maxxs.append(maxx)
            minys.append(miny), maxys.append(maxy)
        minxs, maxxs = np.array(minxs), np.array(maxxs)
        minys, maxys = np.array(minys), np.array(maxys)
        sizexs = maxxs-minxs
        sizeys = maxys-minys
        maxsizex = sizexs.max()
        maxsizey = sizeys.max()
        minx, maxx = minxs.min(), maxxs.max()
        miny, maxy = minys.min(), maxys.max()
        sizex = maxx-minx
        sizey = maxy-miny
        
        if ax is not None: # If `ax` is given, use it. This also ensures that axs is always defined below
            if isinstance(ax, matplotlib.axes.Axes):
                axs = np.array([[ax]])
            else:
                axs = ax
            if axs.shape != (n_y,n_x):
                raise ValueError(f'The `ax` argument got the wrong shape of axes: needed is {(n_y,n_x)!r} supplied was {axs.shape!r}!')
            axsize = axs[0,0].get_window_extent().transformed(axs[0,0].get_figure().dpi_scale_trans.inverted()).size
            
        elif len(axsize.shape) == 0: # if it is a single scalar value, use that as a global scale
            # this also implies that all axes have to have a common scaling
            if not (sharex or sharey):
                share_scaling = True
            
            if sharex:
                axsizex = sizex * axsize
            else:
                axsizex = maxsizex * axsize
            if sharey:
                axsizey = sizey * axsize
            else:
                axsizey = maxsizey * axsize
                
            axsize = np.array([axsizex, axsizey])
            
        elif axsize[0] is None or axsize[1] is None:
            
            # determine the missing axsize
            if axsize[0] is None and axsize[1] is None:
                raise ValueError(f'The parameter `axsize` got `(None, None)`, while only one of the entries of the tuple can be `None`!')
                
            aspect_ratio = (sizey if sharey else sizeys) / (sizex if sharex else sizexs)
            
            if not (sharex and sharey): # if aspect_ratio is not yet fixed and still a vector
            
                #aspect_ratio = max(aspect_ratio)
                #aspect_ratio = min(aspect_ratio)
                
                #np.argmax(np.abs(aspect_ratio - 1))
                
                if sharex: # x is fixed: make y as large as needed
                    aspect_ratio = max(aspect_ratio)
                elif sharey: # y is fixed: make x as large as needed
                    aspect_ratio = min(aspect_ratio)
                else:
                    aspect_ratio = aspect_ratio.mean() # we are free to take the one which fits best - in some arbitrary sense...
                
            if axsize[0] is None:
                axsize[0] = axsize[1] / aspect_ratio
            else: # axsize[1] is None
                axsize[1] = axsize[0] * aspect_ratio
        
        if share_scaling:
            scale_0 = (axsize[0]-2*margin) / maxsizex
            scale_1 = (axsize[1]-2*margin) / maxsizey
            scale = min(scale_0, scale_1)
    
    aspect_ratio = axsize[1] / axsize[0]
    
    wspace, hspace = padding / axsize
    
    if ax is None:
        fig, axs = subplots(n_x,n_y,axsize=axsize,sharex=sharex,sharey=sharey,wspace=wspace,hspace=hspace,title=title)
    else:
        fig = axs[0,0].get_figure()
    
    min_max = np.zeros((*axs.shape,2))
    for i_sample, sample in enumerate(typing_data['sample'].unique()):
        sub = typing_data[typing_data['sample']==sample]
        n_methods = len(sub)
        ax = axs[:,(i_sample*n_methods*n_cols):((i_sample+1)*n_methods*n_cols)]
        _min_max = None if cmap is None else min_max[:,(i_sample*n_methods*n_cols):((i_sample+1)*n_methods*n_cols)]
        spatial_distribution_plot(sub['data'], coords[sample], colors, axs=ax, n_cols=n_cols, joint=joint, normalize=normalize, point_size=point_size, cmap=cmap, cmap_vmin_vmax=cmap_vmin_vmax, out_min_max=_min_max, scale=scale, grid=grid, margin=margin, render=render, rasterized=rasterized, noticks=noticks, axes_labels=axes_labels, on_data_legend=on_data_legend)
        if background_color is not None:
            for _ax in ax.flatten():
                _ax.set_facecolor(background_color)
    
    if share_scaling:
    
        fig.canvas.draw() # make the transformations have well defined values
        
        ax_heights = []
        ax_widths = []
        for _ax in axs.flatten():
            ax_x_low,ax_y_low = _ax.transData.inverted().transform(_ax.transAxes.transform((0,0)))
            ax_x_high,ax_y_high = _ax.transData.inverted().transform(_ax.transAxes.transform((1,1)))
            ax_height,ax_width = (ax_y_high-ax_y_low),(ax_x_high-ax_x_low)
            ax_heights.append(ax_height)
            ax_widths.append(ax_width)
        max_height = max(ax_heights)
        max_width = max(ax_widths)

        for _ax in axs.flatten():
            ax_x_low,ax_y_low = _ax.transData.inverted().transform(_ax.transAxes.transform((0,0)))
            ax_x_high,ax_y_high = _ax.transData.inverted().transform(_ax.transAxes.transform((1,1)))
            axes_ratio = (ax_y_high-ax_y_low)/(ax_x_high-ax_x_low)

            delta = max_height - (ax_y_high - ax_y_low)
            ax_y_high += delta / 2
            ax_y_low -= delta / 2
            _ax.set_ylim((ax_y_low,ax_y_high))

            delta = max_width - (ax_x_high - ax_x_low)
            ax_x_high += delta / 2
            ax_x_low -= delta / 2
            _ax.set_xlim((ax_x_low,ax_x_high))

    fig.canvas.draw() # make the transformations have well defined values
    
    # make axes use the full axsize
    for _ax in axs.flatten():
        ax_x_low,ax_y_low = _ax.transData.inverted().transform(_ax.transAxes.transform((0,0)))
        ax_x_high,ax_y_high = _ax.transData.inverted().transform(_ax.transAxes.transform((1,1)))
        axes_ratio = (ax_y_high-ax_y_low)/(ax_x_high-ax_x_low)
        if aspect_ratio > axes_ratio:
            # increase height
            delta = (aspect_ratio / axes_ratio - 1) * (ax_y_high - ax_y_low)
            ax_y_high += delta / 2
            ax_y_low -= delta / 2
            _ax.set_ylim((ax_y_low,ax_y_high))
        else:
            # increase width
            delta = (axes_ratio / aspect_ratio - 1) * (ax_x_high - ax_x_low)
            ax_x_high += delta / 2
            ax_x_low -= delta / 2
            _ax.set_xlim((ax_x_low,ax_x_high))
    
    if legend:
        _add_legend_or_colorbars(fig, axs, colors, cmap=cmap, min_max=min_max)
    
    return fig



def get_cellsize(adata, key='OTT', reference_adata=None, reference_key='OTT', pfn_key=None, counts_location=None):
    if isinstance(key, pd.DataFrame):
        solution = key
    elif key in adata.obsm:
        solution = adata.obsm[key]
    else:
        solution = pd.get_dummies(adata.obs[key])
    
    if reference_adata is not None:
        counts = get.counts(adata, counts_location=counts_location)[adata.obs.index,adata.var.index]
        pfn = adata.var.index.isin(reference_adata.var.index).astype(float) # basic level of rescaling: using common genes only
        bare_read_count = pd.Series(np.array(counts.X @ pfn).flatten(), index=adata.obs.index)
        #print('get_cell_size0:',pfn.sum())
        if pfn_key is not None:
            if pfn_key in adata.varm and key in adata.varm[pfn_key]:
                pfn = pfn * adata.varm[pfn_key][key].to_numpy()
                #print('multiplying', adata.varm[pfn_key][key].to_numpy())
            if pfn_key in reference_adata.varm and reference_key in reference_adata.varm[pfn_key]:
                pfn = pfn / reference_adata.varm[pfn_key][reference_key].reindex(adata.var.index).to_numpy()
                #print('dividing', reference_adata.varm[pfn_key][reference_key].reindex(adata.var.index).to_numpy())
        #print('get_cell_size1:',pfn.sum())
        pfn = np.nan_to_num(pfn).astype(float)
        #print('get_cell_size2:',pfn.sum())
        read_count = pd.Series(np.array(counts.X @ pfn).flatten(), index=adata.obs.index)
        read_count *= bare_read_count.sum() / read_count.sum()
    else:
        counts = get.counts(adata, counts_location=counts_location)[adata.obs.index]
        read_count = pd.Series(np.array(counts.X.sum(axis=1)).flatten(), index=adata.obs.index)
    
    by_cells = solution / solution.sum(axis=1).to_numpy()[:,None]
    by_reads = by_cells * read_count[solution.index].to_numpy()[:,None]
    cellsize = by_reads.sum(axis=0) / by_cells.sum(axis=0).to_numpy()
    
    return cellsize

def _validate_cross_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads, method_labels, counts_location=None):
    typing_data, adatas, methods, types, colors = _validate_args(adata, keys, colors, show_only, reads=reads, method_labels=method_labels, counts_location=counts_location)
    
    if basis_adata is None:
        basis_adata = adata
    if basis_keys is None:
        basis_keys = keys
    
    basis_typing_data, basis_adatas, basis_methods, basis_types, basis_colors = _validate_args(basis_adata, basis_keys, colors, show_only, reads=reads, method_labels=method_labels, counts_location=counts_location)
    
    if len(types.intersection(basis_types)) != len(types):
        raise Exception('The available types provided in adata %s and basis adata %s dont agree!' % (types, basis_types))
    
    return typing_data, adatas, methods, types, colors, basis_typing_data, basis_adatas, basis_methods

def _cross_scatter(data, axs, colors, marker='o'):
    if data.shape != axs.shape:
        raise Exception('The shapes of the data and axs arrays %s and %s dont agree!' % (data.shape, axs.shape))
    n_y, n_x = data.shape
    for x in range(n_x):
        for y in range(n_y):
            x_data,y_data = data.iloc[y,x]
            ax = axs[y,x]
            ax.grid(True)
            common = colors.index
            common = common.intersection(x_data[x_data > 0].index)
            common = common.intersection(y_data[y_data > 0].index)
            
            _scatter_plot(ax,x=x_data[common],y=y_data[common],
                       colors=colors[common],
                       marker=marker, log=True)
            if y == n_y-1:
                ax.set_xlabel(data.columns[x],rotation=15,va='top',ha='right')
                ax.tick_params(axis='x', rotation=45, which='both')
            if x == 0:
                ax.set_ylabel(data.index[y],rotation='horizontal',ha='right')
                        
    axs[0,n_x-1].legend(handles=[mpatches.Patch(color=color, label=ind) for (ind, color) in colors.items() ],
              bbox_to_anchor=(1, 1), loc='upper left', ncol=1)


[docs]
def cellsize(
    adata,
    keys,
    colors=None,
    show_only=None,
    axsize=(1.5,1.5),
    basis_adata=None,
    basis_keys=None,
    pfn_key=None,
    use_reference=False,
    method_labels=None,
    counts_location=None
):

    """\
    Scatter plots of average cell sizes in an annotation category in the whole
    dataset against some reference dataset. These cell sizes are given as the
    average number of counts per unit cell.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs` and/or `.obsm`.
        Can also be a mapping of labels to :class:`~anndata.AnnData` to specify
        multiple datasets.
    keys
        The `.obs`/`.obsm` annotation keys to compare. Can be a single string, or a
        list of strings, or a mapping of the labels of `adata` to strings or lists
        of strings.
    colors
        A mapping of annotation values to colors. If `None`, default colors are used.
    show_only
        A subset of annotation values to restrict the plotting to.
    axsize
        Tuple of width and size of a single axis.
    basis_adata
        like `adata`, but for reference data.
    basis_keys
        like `adata`, but for reference data.
    pfn_key
        A `.varm` key containing platform normalization factors. Ignored if platform
        normalization is not requested via `use_reference`.
    use_reference
        Whether and how to use reference data for the determination of cell sizes in
        non-reference data.
        Can be a single choice or an array of choices. Possible choices are:
        
        - `False`: Dont use the reference
        - `True`: Determine the cell sizes on the set of common genes with the
          reference data
        - 'pfn': Like `True`, but use additionally platform normalization factors.
        - `None`: Use all settings which are available, i.e. includes `True`, `False,
          and if `pfn_key!=None` also 'pfn'.
    method_labels
        A mapping from the strings in `keys` and `basis_keys` to (shorter) names
        to show in the plot.
    counts_location
        A string or tuple specifying where the count matrix is stored, e.g. `'X'`,
        `('raw','X')`, `('raw','obsm','my_counts_key')`, `('layer','my_counts_key')`,
        ... For details see :func:`~tacco.get.counts`.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    typing_data, adatas, methods, types, colors, basis_typing_data, basis_adatas, basis_methods = _validate_cross_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads=False, method_labels=method_labels, counts_location=counts_location)
    n_solutions, n_samples, n_types = len(typing_data), len(adatas), len(types)
    n_solutions_basis, n_samples_basis = len(basis_typing_data), len(basis_adatas)
    
    possible_use_refs = [False,True,'pfn']
    if isinstance(use_reference, (str,bool)):
        use_reference = [use_reference]
    elif use_reference is None:
        if pfn_key is None:
            use_reference = [False,True]
        else:
            use_reference = possible_use_refs
    if 'pfn' in use_reference and pfn_key is None:
        raise Exception('If platform normalization is desired (as indicated by specifying "pfn" in the "use_reference" argument), the argument "pfn_key" has to be specified!')
    use_ref_labels = []
    for use_ref in use_reference:
        if use_ref not in possible_use_refs:
            raise Exception('The argument use_reference got the unknown value "%s"! Only %s are possible.' % (use_ref, possible_use_refs))
        if isinstance(use_ref,str):
            if use_ref == 'pfn':
                use_ref_label = ' - platform corrected'
        elif use_ref: # ignore pfn for cellsize estimation
            use_ref_label = ' - common genes'
        else: # ignore reference for cellsize estimation
            use_ref_label = ''
        use_ref_labels.append(use_ref_label)
        
    n_x = n_solutions
    n_y = n_solutions_basis * len(use_reference)
    
    fig, axs = subplots(n_x,n_y,axsize=axsize,sharex='all',sharey='all',wspace=0,hspace=0)
    
    data = pd.DataFrame(dtype=object,columns=typing_data.index,index=[basis_typing_data.index[y % n_solutions_basis] + use_ref_labels[y // n_solutions_basis] for y in range(n_y)])

    for x in range(n_x):
        for y in range(n_y):
            x_key = typing_data['method'].iloc[x]
            x_adata = adatas[typing_data['sample'].iloc[x]]
            
            y_key = basis_typing_data['method'].iloc[y % n_solutions_basis]
            y_adata = basis_adatas[basis_typing_data['sample'].iloc[y % n_solutions_basis]]
            
            use_ref = use_reference[y // n_solutions_basis]
            if isinstance(use_ref,str):
                if use_ref == 'pfn':
                    cell_sizes_x = get_cellsize(x_adata, x_key, reference_adata=y_adata, reference_key=y_key, counts_location=counts_location)#, pfn_key=None) # no platform normalization effects, only on common genes
                    cell_sizes_y = get_cellsize(y_adata, y_key, reference_adata=x_adata, reference_key=x_key, counts_location=counts_location, pfn_key=pfn_key) # include platform normalization effects for both adatas in these size estimations
            elif use_ref: # ignore pfn for cellsize estimation
                cell_sizes_x = get_cellsize(x_adata, x_key, reference_adata=y_adata, counts_location=counts_location) # no platform normalization effects, only on common genes
                cell_sizes_y = get_cellsize(y_adata, y_key, reference_adata=x_adata, counts_location=counts_location) # include platform normalization effects for both adatas in these size estimations
            else: # ignore reference for cellsize estimation
                cell_sizes_x = get_cellsize(x_adata, x_key, counts_location=counts_location)
                cell_sizes_y = get_cellsize(y_adata, y_key, counts_location=counts_location)
                
            data.iloc[y,x] = (cell_sizes_x,cell_sizes_y)
    
    _cross_scatter(data, axs, colors, marker='x')
    
    return fig


def _validate_frequency_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads, method_labels=None, counts_location=None):
    typing_data, adatas, methods, types, _colors = _validate_args(adata, keys, colors, show_only, reads, method_labels=method_labels, counts_location=counts_location)
    
    if basis_adata is not None:
        basis_typing_data, basis_adatas, basis_methods, basis_types, _colors = _validate_args(basis_adata, basis_keys, colors, show_only, reads, method_labels=method_labels, counts_location=counts_location)
    
        if len(types.intersection(basis_types)) != len(types):
            raise Exception('The available types provided in adata %s and basis adata %s dont agree!' % (types, basis_types))
            
        typing_data = pd.concat([basis_typing_data, typing_data])
    
    return typing_data, adatas, methods, types, _colors


[docs]
def frequency_bar(
    adata,
    keys,
    colors=None,
    show_only=None,
    axsize=None,
    basis_adata=None,
    basis_keys=None,
    horizontal=True,
    reads=False,
    method_labels=None,
    counts_location=None,
    ax=None,
):

    """\
    Bar plots of the total frequency of annotation in the whole dataset against
    some reference dataset.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs` and/or
        `.obsm`. Can also be a mapping of labels to :class:`~anndata.AnnData`
        to specify multiple datasets. The :class:`~anndata.AnnData` instances
        can be replaced also by :class:`~pandas.DataFrame`, which are then
        treated like the `.obs` of an :class:`~anndata.AnnData`.
    keys
        The `.obs`/`.obsm` annotation keys to compare. Can be a single string,
        or a list of strings, or a mapping of the labels of `adata` to strings
        or lists of strings.
    colors
        A mapping of annotation values to colors. If `None`, default colors are
        used.
    show_only
        A subset of annotation values to restrict the plotting to.
    axsize
        Tuple of width and size of a single axis. If `None`, a default size is
        chosen automatically.
    basis_adata
        like `adata`, but for reference data.
    basis_keys
        like `adata`, but for reference data.
    horizontal
        Whether to draw the bars horizontally.
    reads
        Whether to work with read or cell count fractions.
    method_labels
        A mapping from the strings in `keys` and `basis_keys` to (shorter)
        names to show in the plot.
    counts_location
        A string or tuple specifying where the count matrix is stored, e.g.
        `'X'`, `('raw','X')`, `('raw','obsm','my_counts_key')`,
        `('layer','my_counts_key')`, ... For details see
        :func:`~tacco.get.counts`.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting.
        
    Returns
    -------
    The :class:`~matplotlib.figure.Figure` containing the plot.
    
    """
    
    typing_data, adatas, methods, types, colors = _validate_frequency_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads, method_labels=method_labels, counts_location=counts_location)
        
    n_solutions = len(typing_data)
    
    if ax is None:
        if axsize is None:
            if horizontal:
                axsize=(9.1,0.7*n_solutions)
            else:
                axsize=(0.7*n_solutions,5)
        fig, axs = subplots(axsize=axsize)
        ax = axs[0,0]
    else:
        fig = ax.figure
    
    type_freqs = pd.DataFrame({ method: data.sum(axis=0) for method, data in typing_data['data'].items() })
    
    norm_tf = (type_freqs/type_freqs.sum(axis=0)).T
    norm_tf = norm_tf.fillna(0)
    
    _composition_bar(norm_tf, colors, horizontal=horizontal, ax=ax)

    return fig



[docs]
def frequency(
    adata,
    keys,
    colors=None,
    show_only=None,
    axsize=(1.5,1.5),
    basis_adata=None,
    basis_keys=None,
    reads=False,
    method_labels=None,
    counts_location=None
):

    """\
    Scatter plots of the total frequency of annotation in the whole dataset
    against some reference dataset.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs` and/or
        `.obsm`. Can also be a mapping of labels to :class:`~anndata.AnnData`
        to specify multiple datasets. The :class:`~anndata.AnnData` instances
        can be replaced also by :class:`~pandas.DataFrame`, which are then
        treated like the `.obs` of an :class:`~anndata.AnnData`.
    keys
        The `.obs`/`.obsm` annotation keys to compare. Can be a single string,
        or a list of strings, or a mapping of the labels of `adata` to strings
        or lists of strings.
    colors
        A mapping of annotation values to colors. If `None`, default colors are
        used.
    show_only
        A subset of annotation values to restrict the plotting to.
    axsize
        Tuple of width and size of a single axis.
    basis_adata
        like `adata`, but for reference data.
    basis_keys
        like `adata`, but for reference data.
    reads
        Whether to work with read or cell count fractions.
    method_labels
        A mapping from the strings in `keys` and `basis_keys` to (shorter)
        names to show in the plot.
    counts_location
        A string or tuple specifying where the count matrix is stored, e.g.
        `'X'`, `('raw','X')`, `('raw','obsm','my_counts_key')`,
        `('layer','my_counts_key')`, ... For details see
        :func:`~tacco.get.counts`.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """

    typing_data, adatas, methods, types, colors, basis_typing_data, basis_adatas, basis_methods = _validate_cross_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads=reads, method_labels=method_labels, counts_location=counts_location)
    n_solutions, n_samples, n_types = len(typing_data), len(adatas), len(types)
    n_solutions_basis, n_samples_basis = len(basis_typing_data), len(basis_adatas)
    
    n_x = n_solutions
    n_y = n_solutions_basis
    
    fig, axs = subplots(n_x,n_y,axsize=axsize,sharex='all',sharey='all',wspace=0,hspace=0)
    
    type_freqs = pd.DataFrame({ method: data.sum(axis=0) for method, data in typing_data['data'].items() })
    type_freqs = type_freqs / type_freqs.sum(axis=0) # normalize freqs
    
    basis_type_freqs = pd.DataFrame({ method: data.sum(axis=0) for method, data in basis_typing_data['data'].items() })
    basis_type_freqs = basis_type_freqs / basis_type_freqs.sum(axis=0) # normalize freqs
    
    data = pd.DataFrame(dtype=object,columns=typing_data.index,index=basis_typing_data.index)
    
    for x in range(n_x):
        for y in range(n_y):
            x_data = type_freqs.iloc[:,x]
            y_data = basis_type_freqs.iloc[:,y]
            data.iloc[y,x] = (x_data,y_data)
        
    _cross_scatter(data, axs, colors, marker='o')

    return fig



[docs]
def comparison(
    adata,
    keys,
    colors=None,
    show_only=None,
    axsize=(2.0,2.0),
    basis_adata=None,
    basis_keys=None,
    method_labels=None,
    counts_location=None,
    point_size=2,
    joint=None
):

    """\
    Scatterplots of the annotation fractions of different annotations, e.g. of
    different annotation methods or of methods and a ground truth.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs` and/or `.obsm`.
        Can also be a mapping of labels to :class:`~anndata.AnnData` to specify
        multiple datasets.
    keys
        The `.obs`/`.obsm` annotation keys to compare. Can be a single string, or a
        list of strings, or a mapping of the labels of `adata` to strings or lists
        of strings.
    colors
        A mapping of annotation values to colors. If `None`, default colors are used.
    show_only
        A subset of annotation values to restrict the plotting to.
    axsize
        Tuple of width and size of a single axis.
    basis_adata
        like `adata`, but for reference data.
    basis_keys
        like `adata`, but for reference data.
    method_labels
        A mapping from the strings in `keys` and `basis_keys` to (shorter) names
        to show in the plot.
    counts_location
        A string or tuple specifying where the count matrix is stored, e.g. `'X'`,
        `('raw','X')`, `('raw','obsm','my_counts_key')`, `('layer','my_counts_key')`,
        ... For details see :func:`~tacco.get.counts`.
    point_size
        The size of the points in the plot.
    joint
        Whether to plot only one scatter plot with all annotation categories or only
        the scatter plots with one annotation category per plot. If `None`, plot both.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """

    typing_data, adatas, methods, types, colors, basis_typing_data, basis_adatas, basis_methods = _validate_cross_args(adata, keys, colors, show_only, basis_adata, basis_keys, reads=False, method_labels=method_labels, counts_location=counts_location)
    n_solutions, n_samples, n_types = len(typing_data), len(adatas), len(types)
    n_solutions_basis, n_samples_basis = len(basis_typing_data), len(basis_adatas)
    
    n_types = len(colors.index)
    n_x = n_solutions
    if joint is None:
        n_y = n_types + 1
    elif joint:
        n_y = 1
    else:
        n_y = n_types
    n_y = n_solutions_basis * n_y
    
    fig, axs = subplots(n_x,n_y,axsize=axsize,sharex='all',sharey='all',wspace=0.25,hspace=0.25)
    for ax in axs.flatten():
        #ax.grid(False)
        #ax.axes.xaxis.set_ticks([])
        #ax.axes.yaxis.set_ticks([])
        pass
    
    for i,basis_name in enumerate(basis_typing_data.index):
        basis_sample, basis_type_freqs = basis_typing_data.loc[basis_name,['sample', 'data']]
        basis_type_freqs /= basis_type_freqs.sum(axis=1).to_numpy()[:,None]
        
        for j,name in enumerate(typing_data.index):
        
            sample, type_freqs = typing_data.loc[name,['sample', 'data']]
            type_freqs /= type_freqs.sum(axis=1).to_numpy()[:,None]
            
            common = type_freqs.index.intersection(basis_type_freqs.index)
            type_freqs = type_freqs.loc[common]
            _basis_type_freqs = basis_type_freqs.loc[common]
            
            def plotit(t, ax):
                x = type_freqs[t]
                y = _basis_type_freqs[t]
                ax.scatter(x, y, color=colors[t], s=point_size, label=t, edgecolors='none')
            
            if joint is None:
                joint_ax = axs[i * (n_types + 1),j]
                for it,t in enumerate(colors.index):
                    plotit(t, joint_ax)
                    ax = axs[i * (n_types + 1) + it + 1, j]
                    plotit(t, ax)
                    pc, x2 = _correlations(type_freqs[t], _basis_type_freqs[t], log=False)
                    #ax.set_title('%s\nr=%.2f $\\chi^2_m$=%.2f'%(t, pc, x2))
                    ax.set_title('r=%.2f $\\chi^2_m$=%.2f'%(pc, x2))
                pc, x2 = _correlations(type_freqs.to_numpy().flatten(), _basis_type_freqs.to_numpy().flatten(), log=False)
                joint_ax.set_title('r=%.2f $\\chi^2_m$=%.2f'%(pc, x2))
            elif joint:
                joint_ax = axs[i,j]
                for t in colors.index:
                    plotit(t, joint_ax)
                pc, x2 = _correlations(type_freqs.to_numpy().flatten(), _basis_type_freqs.to_numpy().flatten(), log=False)
                joint_ax.set_title('r=%.2f $\\chi^2_m$=%.2f'%(pc, x2))
            else:
                for it,t in enumerate(colors.index):
                    ax = axs[i * n_types + it, j]
                    plotit(t, ax)
                    pc, x2 = _correlations(type_freqs[t], _basis_type_freqs[t], log=False)
                    #ax.set_title('%s\nr=%.2f $\\chi^2_m$=%.2f'%(t, pc, x2))
                    ax.set_title('r=%.2f $\\chi^2_m$=%.2f'%(pc, x2))
            
    for ax,name in zip(axs[n_y-1,:],typing_data.index):
        ax.set_xlabel(name,rotation=15,va='top',ha='right')
        #ax.tick_params(axis='x', rotation=45, which='both')
    for ax,basis_name in zip(axs[:,0],list(basis_typing_data.index) * (n_y//n_solutions_basis)):
        ax.set_ylabel(basis_name,rotation='horizontal',ha='right')
                        
    axs[0,n_x-1].legend(handles=[mpatches.Patch(color=color, label=ind) for (ind, color) in colors.items() ],
              bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
            
            #typing = pd.Series({'%s (%s)' % (name, basis_name): pd.get_dummies(mdata.obs['split_type'])})
            
    return fig




[docs]
def compositions(
    adata,
    value_key,
    group_key,
    basis_adata=None,
    basis_value_key=None,
    basis_group_key=None,
    fillna=None,
    restrict_groups=None,
    restrict_values=None,
    basis_restrict_groups=None,
    basis_restrict_values=None,
    reads=False,
    colors=None,
    horizontal=False,
    axsize=None,
    ax=None,
    legend=True,
):

    """\
    Plot compositions of groups. In contrast to :func:`~tacco.plots.contribution`, compositions
    have to add up to one.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs`.
    value_key
        The `.obs` or `.obsm` key with the values to determine the
        enrichment for.
    group_key
        The `.obs` key with categorical group information.
    basis_adata
        Another :class:`~anndata.AnnData` with annotation in `.obs`
        to compare. If `None`, only the `adata` composition is shown.
    basis_value_key
        The `.obs` or `.obsm` key for `basis_adata` with the values
        to determine the enrichment for. If `None`, `value_key` is used.
    basis_group_key
        The `.obs` key with categorical group information for
        `basis_adata`. If `None`, `value_key` is used.
    fillna
        If `None`, observation containing NA in the values are filtered.
        Else, NA values are replaced with this value.
    restrict_groups
        A list-like containing the groups within which the enrichment analysis is
        to be done. If `None`, all groups are included.
    restrict_values
        A list-like containing the values within which the enrichment analysis is
        to be done. If `None`, all values are included. Works only for categorical
        values.
    basis_restrict_groups
        Like `restrict_groups` but for `basis_adata`.
    basis_restrict_values
        Like `restrict_values` but for `basis_adata`.
    reads
        Whether to weight the values by the total count per observation
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
    horizontal
        Whether to plot the bar plot horizontally.
    axsize
        Tuple of width and size of a single axis. If `None`, use
        automatic values.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting.
    legend
        Whether to include the legend
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure` if `ax` is `None`, else `None`.
    
    """
    
    compositions = get_compositions(adata=adata, value_key=value_key, group_key=group_key, fillna=fillna, restrict_groups=restrict_groups, restrict_values=restrict_values, reads=reads)
    
    colors, types = _get_colors(colors, compositions.columns)
    
    if basis_adata is not None:
        if basis_value_key is None:
            basis_value_key = value_key
        if basis_group_key is None:
            basis_group_key = group_key
        basis_compositions = get_compositions(adata=basis_adata, value_key=basis_value_key, group_key=basis_group_key, fillna=fillna, restrict_groups=basis_restrict_groups, restrict_values=basis_restrict_values, reads=reads)
    
        basis_compositions.index = basis_compositions.index.astype(str) + ' (reference)'
        compositions = pd.concat([basis_compositions,compositions])
    
    n_solutions = len(compositions.index)
    
    if ax is not None:
        fig = None
    else:
        if axsize is None:
            if horizontal:
                axsize=(9.1,0.7*n_solutions)
            else:
                axsize=(0.7*n_solutions,5)
        fig, axs = subplots(axsize=axsize)
        ax = axs[0,0]
    
    _composition_bar(compositions, colors, horizontal=horizontal, ax=ax, legend=legend)
    
    return fig


def _prep_contributions(
    adata,
    value_key,
    group_key,
    sample_key=None,
    basis_adata=None,
    basis_value_key=None,
    basis_group_key=None,
    basis_sample_key=None,
    fillna=None,
    restrict_groups=None,
    restrict_values=None,
    basis_restrict_groups=None,
    basis_restrict_values=None,
    reduction='sum',
    normalization=None,
    assume_counts=None,
    reads=False,
    colors=None,
):
    """\
    Prepares contribution data.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs`.
    value_key
        The `.obs` or `.obsm` key with the values to determine the
        enrichment for.
    group_key
        The `.obs` key with categorical group information.
    sample_key
        The `.obs` key with categorical sample information. If `None`,
        only the aggregated data is plotted. Otherwise the data is aggregated
        per sample and total and per-sample values are plotted.
    basis_adata
        Another :class:`~anndata.AnnData` with annotation in `.obs`
        to compare. If `None`, only the `adata` composition is shown.
    basis_value_key
        The `.obs` or `.obsm` key for `basis_adata` with the values
        to determine the enrichment for. If `None`, `value_key` is used.
    basis_group_key
        The `.obs` key with categorical group information for
        `basis_adata`. If `None`, `value_key` is used.
    basis_sample_key
        The `.obs` key with categorical sample information for
        `basis_adata`. If `None`, `sample_key` is used.
    fillna
        If `None`, observation containing NA in the values are filtered.
        Else, NA values are replaced with this value.
    restrict_groups
        A list-like containing the groups within which the enrichment analysis
        is to be done. If `None`, all groups are included.
    restrict_values
        A list-like containing the values within which the enrichment analysis
        is to be done. If `None`, all values are included. Works only for
        categorical values.
    basis_restrict_groups
        Like `restrict_groups` but for `basis_adata`.
    basis_restrict_values
        Like `restrict_values` but for `basis_adata`.
    reduction
        The reduction to apply on each (group,sample) subset of the data.
        Possible values are:
        
        - 'sum': sum of the values over observations
        - 'mean': mean of the values over observations
        - 'median': median of the values over observations
        - a callable mapping a :class:`~pandas.DataFrame` to its reduced
          counterpart
    
    normalization
        The normalization to apply on each reduced (group,sample) subset of the
        data. Possible values are:
        
        - 'sum': normalize values by their sum (yields fractions)
        - 'percent': like 'sum' scaled by 100 (yields percentages)
        - 'gmean': normalize values by their geometric mean (yields
          contributions which make more sense for enrichments than fractions,
          due to zero-sum issue; see :func:`~tacco.tools.enrichments`)
        - 'clr': "Center logratio transform"; like 'gmean' with additional log
          transform; makes the distribution more normal and better suited for t
          tests
        - `None`: no normalization
        - a value name from `value_key`: all values are normalized to this
          contribution
        - a callable mapping a :class:`~pandas.DataFrame` to its normalized
          counterpart
    
    assume_counts
        Ony relevant for `normalization=='gmean'` and `normalization=='clr'`;
        whether to regularize zeros by adding a pseudo count of 1 or by
        replacing them by 1e-3 of the minimum value. If `None`, check whether
        the data are consistent with count data and assume counts accordingly,
        except if `reads==True`, then also `assume_counts==True`.
    reads
        Whether to weight the values by the total count per observation
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
        
    Returns
    -------
    A tuple containing contributions, detailed_contributions, colors, types.
    
    """
    contributions = get_contributions(adata=adata, value_key=value_key, group_key=group_key, sample_key=None, fillna=fillna, restrict_groups=restrict_groups, restrict_values=restrict_values, reads=reads, reduction=reduction, normalization=normalization, assume_counts=assume_counts)
    detailed_contributions = None
    if sample_key is not None:
        detailed_contributions = get_contributions(adata=adata, value_key=value_key, group_key=group_key, sample_key=sample_key, fillna=fillna, restrict_groups=restrict_groups, restrict_values=restrict_values, reads=reads, reduction=reduction, normalization=normalization, assume_counts=assume_counts)
    
    colors, types = _get_colors(colors, contributions.columns)
    
    if basis_adata is not None:
        if basis_value_key is None:
            basis_value_key = value_key
        if basis_group_key is None:
            basis_group_key = group_key
        if basis_sample_key is None:
            basis_sample_key = sample_key
        basis_contributions = get_contributions(adata=basis_adata, value_key=basis_value_key, group_key=basis_group_key, sample_key=None, fillna=fillna, restrict_groups=basis_restrict_groups, restrict_values=basis_restrict_values, reads=reads, reduction=reduction, normalization=normalization, assume_counts=assume_counts)
    
        basis_contributions.index = basis_contributions.index.astype(str) + ' (reference)'
        contributions = pd.concat([basis_contributions,contributions])
        
        if sample_key is not None:
            if basis_sample_key is None:
                basis_sample_key = sample_key
            detailed_basis_contributions = get_contributions(adata=basis_adata, value_key=basis_value_key, group_key=basis_group_key, sample_key=basis_sample_key, fillna=fillna, restrict_groups=basis_restrict_groups, restrict_values=basis_restrict_values, reads=reads, reduction=reduction, normalization=normalization, assume_counts=assume_counts)
    
            detailed_basis_contributions.index = detailed_basis_contributions.index.set_levels([detailed_basis_contributions.index.levels[0].astype(str) + ' (reference)',detailed_basis_contributions.index.levels[1].astype(str) ])
            detailed_basis_contributions.index.names = detailed_contributions.index.names
            detailed_contributions = pd.concat([detailed_basis_contributions,detailed_contributions])
    
    return contributions, detailed_contributions, colors, types


[docs]
def contribution(
    adata,
    value_key,
    group_key,
    sample_key=None,
    basis_adata=None,
    basis_value_key=None,
    basis_group_key=None,
    basis_sample_key=None,
    fillna=None,
    restrict_groups=None,
    restrict_values=None,
    basis_restrict_groups=None,
    basis_restrict_values=None,
    reduction='sum',
    normalization='gmean',
    assume_counts=None,
    reads=False,
    colors=None,
    axsize=None,
    log=True,
    ax=None,
):

    """\
    Plot contribution to groups. In contrast to :func:`~tacco.plots.composition`,
    contributions dont have to add up to one.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs`.
    value_key
        The `.obs` or `.obsm` key with the values to determine the
        enrichment for.
    group_key
        The `.obs` key with categorical group information.
    sample_key
        The `.obs` key with categorical sample information. If `None`,
        only the aggregated data is plotted. Otherwise the data is aggregated
        per sample and total and per-sample values are plotted.
    basis_adata
        Another :class:`~anndata.AnnData` with annotation in `.obs`
        to compare. If `None`, only the `adata` composition is shown.
    basis_value_key
        The `.obs` or `.obsm` key for `basis_adata` with the values
        to determine the enrichment for. If `None`, `value_key` is used.
    basis_group_key
        The `.obs` key with categorical group information for
        `basis_adata`. If `None`, `value_key` is used.
    basis_sample_key
        The `.obs` key with categorical sample information for
        `basis_adata`. If `None`, `sample_key` is used.
    fillna
        If `None`, observation containing NA in the values are filtered.
        Else, NA values are replaced with this value.
    restrict_groups
        A list-like containing the groups within which the enrichment analysis
        is to be done. If `None`, all groups are included.
    restrict_values
        A list-like containing the values within which the enrichment analysis
        is to be done. If `None`, all values are included. Works only for
        categorical values.
    basis_restrict_groups
        Like `restrict_groups` but for `basis_adata`.
    basis_restrict_values
        Like `restrict_values` but for `basis_adata`.
    reduction
        The reduction to apply on each (group,sample) subset of the data.
        Possible values are:
        
        - 'sum': sum of the values over observations
        - 'mean': mean of the values over observations
        - 'median': median of the values over observations
        - a callable mapping a :class:`~pandas.DataFrame` to its reduced
          counterpart
    
    normalization
        The normalization to apply on each reduced (group,sample) subset of the
        data. Possible values are:
        
        - 'sum': normalize values by their sum (yields fractions)
        - 'percent': like 'sum' scaled by 100 (yields percentages)
        - 'gmean': normalize values by their geometric mean (yields
          contributions which make more sense for enrichments than fractions,
          due to zero-sum issue; see :func:`~tacco.tools.enrichments`)
        - 'clr': "Center logratio transform"; like 'gmean' with additional log
          transform; makes the distribution more normal and better suited for t
          tests
        - `None`: no normalization
        - a value name from `value_key`: all values are normalized to this
          contribution
        - a callable mapping a :class:`~pandas.DataFrame` to its normalized
          counterpart
    
    assume_counts
        Ony relevant for `normalization=='gmean'` and `normalization=='clr'`;
        whether to regularize zeros by adding a pseudo count of 1 or by
        replacing them by 1e-3 of the minimum value. If `None`, check whether
        the data are consistent with count data and assume counts accordingly,
        except if `reads==True`, then also `assume_counts==True`.
    reads
        Whether to weight the values by the total count per observation
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
    axsize
        Tuple of width and size of a single axis. If `None`, use
        automatic values.
    log
        Whether to plot on the log scale.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure` if `ax` is `None`, else `None`.
    
    """
    
    contributions, detailed_contributions, colors, types = _prep_contributions(
        adata=adata,
        value_key=value_key,
        group_key=group_key,
        sample_key=sample_key,
        basis_adata=basis_adata,
        basis_value_key=basis_value_key,
        basis_group_key=basis_group_key,
        basis_sample_key=basis_sample_key,
        fillna=fillna,
        restrict_groups=restrict_groups,
        restrict_values=restrict_values,
        basis_restrict_groups=basis_restrict_groups,
        basis_restrict_values=basis_restrict_values,
        reduction=reduction,
        normalization=normalization,
        assume_counts=assume_counts,
        reads=reads,
        colors=colors,
    )
    
    labels = contributions.columns.astype(str)

    total_bars_width = 0.8

    n_states = len(contributions.index)
    bars_separation = total_bars_width / (n_states * 3)
    bar_separation = total_bars_width / (n_states * 5)

    n_states = len(contributions.index)
    
    if ax is not None:
        fig = None
    else:
        if axsize is None:
            axsize = (len(labels) * (0.3 * n_states + .2), 4)
        fig, ax = subplots(axsize=axsize)
        ax = ax[0,0]

    ax.set_axisbelow(True)
    ax.yaxis.grid()#color='gray', linestyle='dashed')

    alpha = 1 if detailed_contributions is None else 0.5
    
    minor_x_labeling = {'position':[],'label':[]}
    
    for i_state, state in enumerate(contributions.index):
        for i_column, column in enumerate(contributions.columns):
            bar_width = (total_bars_width-n_states*bar_separation)/n_states
            bar_start = i_column - 0.5*total_bars_width + i_state * total_bars_width/n_states + 0.5*bar_separation
            ax.bar(bar_start, contributions.loc[state,column], bar_width, color=colors[column], align='edge', alpha=alpha)

            minor_x_labeling['position'].append(i_column - 0.5*total_bars_width + (i_state+0.5)/(n_states) * total_bars_width)
            minor_x_labeling['label'].append(state)
    
    if detailed_contributions is not None:
        for i_state, state in enumerate(contributions.index):
            df = detailed_contributions[detailed_contributions.index.get_level_values(0) == state]
            n_samples = df.shape[0]
            for i_column, column in enumerate(contributions.columns):
                heights = df[column].sort_values(ascending=False)
                bar_start = i_column - 0.5*total_bars_width + 0.5*bars_separation + i_state/n_states * total_bars_width
                bar_width = (total_bars_width - (n_states - 0) * bars_separation) / (n_states * n_samples)
                ax.bar(bar_start + np.arange(0,(n_samples-0.5)*bar_width,bar_width), heights, bar_width, color=colors[column], align='edge')

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('contribution')
    if log:
        ax.set_yscale('log')

    x = np.arange(len(labels))  # the label locations
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    
    ax.set_xlim(x[0]-0.5,x[-1]+0.5)

    minor_x_labeling = pd.DataFrame(minor_x_labeling).sort_values('position')
    minor_x_labeling['position'] += 1e-2 # tiny shift to make the central minor annotation not be superseded by the major annotation...

    ax.set_xticks(minor_x_labeling['position'],minor=True)
    ax.set_xticklabels(minor_x_labeling['label'], rotation=50, ha='right',minor=True)
    for t in ax.get_xticklabels(minor=True):
        t.set_y(-0.05)

    ax.tick_params( axis='x', which='major', bottom=False, top=False )
    ax.tick_params( axis='x', which='minor', bottom=False, top=False )
    
    return fig



[docs]
def heatmap(
    adata,
    value_key,
    group_key,
    basis_adata=None,
    basis_value_key=None,
    basis_group_key=None,
    fillna=None,
    restrict_groups=None,
    restrict_values=None,
    basis_restrict_groups=None,
    basis_restrict_values=None,
    reduction='sum',
    normalization=None,
    assume_counts=None,
    reads=False,
    colors=None,
    alpha=None,
    axsize=None,
    axes_labels=None,
    annotation=None,
    value_cluster=False,
    group_cluster=False,
    value_dendrogram=False,
    group_dendrogram=False,
    value_order=None,
    group_order=None,
    group_labels_rotation=None,
    ax=None,
    cmap=None,
    cmap_center=0,
    cmap_vmin_vmax=None,
    complement_colors=True,
    colorbar=True,
    colorbar_label=None,
):

    """\
    Plot heatmap of contribution to groups.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs`. Can also be a
        :class:`~pandas.DataFrame` with the data to show in the heatmap. In the
        latter case all adata processing arguments are ignored.
    value_key
        The `.obs` or `.obsm` key with the values to determine the
        enrichment for.
    group_key
        The `.obs` key with categorical group information.
    basis_adata
        Another :class:`~anndata.AnnData` with annotation in `.obs`
        to compare. If `None`, only the `adata` composition is shown.
    basis_value_key
        The `.obs` or `.obsm` key for `basis_adata` with the values
        to determine the enrichment for. If `None`, `value_key` is used.
    basis_group_key
        The `.obs` key with categorical group information for
        `basis_adata`. If `None`, `value_key` is used.
    fillna
        If `None`, observation containing NA in the values are filtered.
        Else, NA values are replaced with this value.
    restrict_groups
        A list-like containing the groups within which the enrichment analysis
        is to be done. If `None`, all groups are included.
    restrict_values
        A list-like containing the values within which the enrichment analysis
        is to be done. If `None`, all values are included. Works only for
        categorical values.
    basis_restrict_groups
        Like `restrict_groups` but for `basis_adata`.
    basis_restrict_values
        Like `restrict_values` but for `basis_adata`.
    reduction
        The reduction to apply on each (group,sample) subset of the data.
        Possible values are:
        
        - 'sum': sum of the values over observations
        - 'mean': mean of the values over observations
        - 'median': median of the values over observations
        - a callable mapping a :class:`~pandas.DataFrame` to its reduced
          counterpart
    
    normalization
        The normalization to apply on each reduced (group,sample) subset of the
        data. Possible values are:
        
        - 'sum': normalize values by their sum (yields fractions)
        - 'percent': like 'sum' scaled by 100 (yields percentages)
        - 'gmean': normalize values by their geometric mean (yields
          contributions which make more sense for enrichments than fractions,
          due to zero-sum issue; see :func:`~tacco.tools.enrichments`)
        - 'clr': "Center logratio transform"; like 'gmean' with additional log
          transform; makes the distribution more normal and better suited for t
          tests
        - `None`: no normalization
        - a value name from `value_key`: all values are normalized to this
          contribution
        - a callable mapping a :class:`~pandas.DataFrame` to its normalized
          counterpart
    
    assume_counts
        Ony relevant for `normalization=='gmean'` and `normalization=='clr'`;
        whether to regularize zeros by adding a pseudo count of 1 or by
        replacing them by 1e-3 of the minimum value. If `None`, check whether
        the data are consistent with count data and assume counts accordingly,
        except if `reads==True`, then also `assume_counts==True`.
    reads
        Whether to weight the values by the total count per observation
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
    alpha
        A value-group-dataframe specifying a separate alpha value for all cells
        in the histogram. If `None`, no transparency is used.
    axsize
        Tuple of width and size of a single axis. If `None`, use
        automatic values.
    axes_labels
        Labels to write on the axes as an list-like of the two labels.
    annotation
        A :class:`~pandas.DataFrame` containing annotation for each heatmap
        cell. If "value", annotate by the values. If `None`, don't annotate.
        If a tuple of "value" and a :class:`~pandas.DataFrame`, append the
        annotation from the dataframe to the values.
    value_cluster
        Whether to cluster and reorder the values.
    group_cluster
        Whether to cluster and reorder the groups.
    value_dendrogram
        Whether to draw a dendrogram for the values. If `True`, this implies
        `value_cluster=True`.
    group_dendrogram
        Whether to draw a dendrogram for the groups. If `True`, this implies
        `group_cluster=True`.
    value_order
        Set the order of the values explicitly with a list or to be close to
        diagonal by specifying "diag"; this option is incompatible with
        `value_cluster` and `value_dendrogram`.
    group_order
        Set the order of the groups explicitly with a list or to be close to
        diagonal by specifying "diag"; this option is incompatible with
        `group_cluster` and `group_dendrogram`.
    group_labels_rotation
        Adjusts the rotation of the group labels in degree.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting. Incompatible with dendrogram plotting.
    cmap
        A string/colormap to override the `colors` with.
    cmap_center
        A value to use as center of the colormap. E.g. choosing `0` sets the
        central color to `0` for every colormap in the plot (i.e. `0` will get
        white, positive and negative colors the color and complement color
        given by `colors` for `cmap` is `None`, and whatever the central color
        of the supplied colormap is if `cmap` is not `None`). If `None`, the
        colormap spans the entire value range.
    cmap_vmin_vmax
        A tuple giving the range of values for the colormap. This can be
        modfied by `cmap_center`.
    complement_colors
        Whether to use complement colors for values below `cmap_center` if
        `cmap==None`.
    colorbar
        Whether to draw a colorbar; only available if `cmap` is not `None`.
    colorbar_label
        The label to use for the colorbar; only available if `colorbar` is
        `True`.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure` if `ax` is `None`, else `None`.
    
    """
    
    if isinstance(adata, pd.DataFrame):
        contributions = adata.fillna(0)
        types = contributions.index
    else:
        contributions, detailed_contributions, colors, types = _prep_contributions(
            adata=adata,
            value_key=value_key,
            group_key=group_key,
            sample_key=None,
            basis_adata=basis_adata,
            basis_value_key=basis_value_key,
            basis_group_key=basis_group_key,
            basis_sample_key=None,
            fillna=fillna,
            restrict_groups=restrict_groups,
            restrict_values=restrict_values,
            basis_restrict_groups=basis_restrict_groups,
            basis_restrict_values=basis_restrict_values,
            reduction=reduction,
            normalization=normalization,
            assume_counts=assume_counts,
            reads=reads,
            colors=colors,
        )
    
    value_cluster = value_cluster or value_dendrogram
    group_cluster = group_cluster or group_dendrogram
    
    if value_order is not None and value_cluster:
        raise ValueError('The options `value_cluster` and `value_dendrogram` are incompatible with the option `value_order`.')
    if group_order is not None and group_cluster:
        raise ValueError('The options `group_cluster` and `group_dendrogram` are incompatible with the option `group_order`.')
    group_order_diag = isinstance(group_order, str) and group_order == 'diag'
    value_order_diag = isinstance(value_order, str) and value_order == 'diag'
    if group_order_diag and value_order is not None and not value_order_diag:
        raise ValueError('The option `group_order=="diag"` is incompatible with non-´None´ or "diag" values for `value_order`.')
    if value_order_diag and group_order is not None and not group_order:
        raise ValueError('The option `value_order=="diag"` is incompatible with non-´None´ or "diag" values for `group_order`.')
    if group_order_diag and group_cluster:
        raise ValueError('The option `group_order=="diag"` is incompatible with the options `value_cluster` and `value_dendrogram`.')
    if value_order_diag and value_cluster:
        raise ValueError('The option `value_order=="diag"` is incompatible with the options `group_cluster` and `group_dendrogram`.')
    
    if ax is not None:
        if value_dendrogram or group_dendrogram:
            raise ValueError('The options `value_dendrogram` and `group_dendrogram` are incompatible with the option `ax`.')
        fig = ax.get_figure()
    else:
        n_ax_x, n_ax_y = 1+value_dendrogram, 1+group_dendrogram
        if axsize is None:
            y_per_value = 0.25
            x_per_group = y_per_value if annotation is None else 0.7
            plot_size_y = contributions.shape[1] * y_per_value
            plot_size_x = contributions.shape[0] * x_per_group
            axsize = (plot_size_x,plot_size_y)
        else:
            plot_size_x,plot_size_y = axsize
        width_ratios = [plot_size_x,0.5*np.log(contributions.shape[1])] if value_dendrogram else None
        height_ratios = [0.5*np.log(contributions.shape[0]),plot_size_y] if group_dendrogram else None
        fig,axs = subplots(n_ax_x, n_ax_y, axsize=axsize, wspace=0, hspace=0, width_ratios=width_ratios, height_ratios=height_ratios)
        ax = axs[-1,0]
    
    if group_labels_rotation is None:
        if annotation is None:
            group_labels_rotation = 45
        else:
            group_labels_rotation = 30
    
    if value_cluster:
        Z = scipy.cluster.hierarchy.linkage(contributions.T, method='average', metric='cosine')
        dn = scipy.cluster.hierarchy.dendrogram(Z, ax=(axs[-1,-1] if value_dendrogram else None), orientation='right', color_threshold=0, above_threshold_color='tab:gray', no_plot=(not value_dendrogram))
        if value_dendrogram:
            axs[-1,-1].set_axis_off()
        
        reordering = pd.Series(dn['ivl']).astype(np.int).to_numpy()
        contributions = contributions.iloc[:,reordering]
    
    if group_cluster:
        Z = scipy.cluster.hierarchy.linkage(contributions, method='average', metric='cosine')
        dn = scipy.cluster.hierarchy.dendrogram(Z, ax=(axs[0,0] if group_dendrogram else None), orientation='top', color_threshold=0, above_threshold_color='tab:gray', no_plot=(not group_dendrogram))
        if group_dendrogram:
            axs[0,0].set_axis_off()
        
        reordering = pd.Series(dn['ivl']).astype(np.int).to_numpy()
        contributions = contributions.iloc[reordering]
        
    if value_dendrogram and group_dendrogram:
        axs[0,-1].set_axis_off()
    
    if isinstance(value_order,str) and value_order  == 'diag':
        # permute towards diagonal
        for i in range(10):
            contributions = contributions.iloc[:,np.argsort(np.argmax(contributions.to_numpy(),axis=0))]
            contributions = contributions.iloc[np.argsort(np.argmax(contributions.to_numpy().T,axis=0))]
    else:
        if value_order is not None:
            contributions = contributions.loc[:,value_order]
        if group_order is not None:
            contributions = contributions.loc[group_order]
    
    _x, _y = [np.arange(-0.5,s+0.5,1) for s in contributions.shape]
    x, y = (_x[:-1] + _x[1:]) / 2, (_y[:-1] + _y[1:]) / 2
    
    def _vmin_vmax(vmin, vmax):
        if cmap_vmin_vmax is not None:
            vmin, vmax = cmap_vmin_vmax
        if cmap_center is not None:
            delta_max = vmax - cmap_center
            delta_min = -(vmin - cmap_center)
            delta = max(delta_max, delta_min)
            vmax = cmap_center + delta
            vmin = cmap_center - delta
        return vmin, vmax
    
    if cmap is None:
        rgba = np.zeros((*contributions.shape,4))
        for j in range(contributions.shape[1]):

            r, g, b = to_rgb(colors[contributions.columns[j]])
            _r, _g, _b = _complement_color(r,g,b) # complement colors for negative values
            vmin, vmax = _vmin_vmax(contributions.iloc[:,j].min(), contributions.iloc[:,j].max())
            norm = Normalize(vmin=vmin, vmax=vmax)
            if complement_colors:
                cmap_j=LinearSegmentedColormap.from_list(contributions.columns[j], [(0,(_r, _g, _b)),(0.5,(1, 1, 1)),(1,(r, g, b))])
            else:
                cmap_j=LinearSegmentedColormap.from_list(contributions.columns[j], [(0,(1, 1, 1)),(1,(r, g, b))])
            mapper = ScalarMappable(norm=norm, cmap=cmap_j)
            rgba[:,j,:] = mapper.to_rgba(contributions.iloc[:,j].to_numpy())
    else:
        vmin, vmax = _vmin_vmax(contributions.to_numpy().min(), contributions.to_numpy().max())

        norm = Normalize(vmin=vmin, vmax=vmax)
        mapper = ScalarMappable(norm=norm, cmap=cmap)
        rgba = mapper.to_rgba(contributions.to_numpy())

        if colorbar:
            height_pxl = 200
            width_pxl = 15
            offset_top_pxl = 0
            offset_left_pxl = 20

            left,bottom = fig.transFigure.inverted().transform(ax.transAxes.transform((1,1))+np.array([offset_left_pxl,-offset_top_pxl-height_pxl]))
            width,height = fig.transFigure.inverted().transform(fig.transFigure.transform((0,0))+np.array([width_pxl,height_pxl]))
            cax = fig.add_axes((left, bottom, width, height))
            cb = fig.colorbar(mapper, cax=cax, label=colorbar_label)
    
    if alpha is not None:
        alpha = alpha.reindex(index=contributions.index, columns=contributions.columns)
        rgba[...,-1] = alpha
    ax.imshow(np.swapaxes(rgba, 0, 1), origin='lower', aspect='auto')
        
    if annotation is not None:
        for i,ind in enumerate(contributions.index):
            for j,col in enumerate(contributions.columns):
                if isinstance(annotation, str):
                    if annotation == 'value':
                        ann = f'{contributions.loc[ind,col]:.2}'
                    else:
                        raise ValueError(f'`annotation` got unknown string argument "{annotation}"')
                elif (hasattr(annotation, 'shape') and annotation.shape == (2,)) or (not hasattr(annotation, 'shape') and len(annotation) == 2):
                    if isinstance(annotation[0], str) and annotation[0] == 'value':
                        if ind in annotation[1].index and col in annotation[1].columns:
                            val = annotation[1].loc[ind,col]
                        else:
                            val = 0#np.nan
                        ann = f'{contributions.loc[ind,col]:.2}{val}'
                    else:
                        raise ValueError('`annotation` got a tuple argument where the first entry is not "value"')
                else:
                    ann = annotation.loc[ind,col]
                ax.annotate(ann, xy=(x[i],y[j]), ha='center', va='center')
    
    ax.set_xticks(x)
    ax.set_xticklabels(contributions.index, rotation=group_labels_rotation, ha=('right' if group_labels_rotation not in [0,90] else 'center'))
    ax.set_yticks(y)
    ax.set_yticklabels(contributions.columns)
    
    _set_axes_labels(ax, axes_labels)
    
    return fig


def _asterisks_from_pvals(pvals):
    pvals = pvals.astype(object)
    anstr = pvals.astype(str)
    anstr.loc[:,:] = ''
    anstr[(pvals > 0) & (np.abs(pvals) <= 0.05)] = '$^{\\ast}$'
    anstr[(pvals > 0) & (np.abs(pvals) <= 0.01)] = '${^{\\ast}}{^{\\ast}}$'
    anstr[(pvals > 0) & (np.abs(pvals) <= 0.001)] = '${^{\\ast}}{^{\\ast}}{^{\\ast}}$'
    anstr[(pvals < 0) & (np.abs(pvals) <= 0.05)] = '$_{\\ast}$'
    anstr[(pvals < 0) & (np.abs(pvals) <= 0.01)] = '${_{\\ast}}{_{\\ast}}$'
    anstr[(pvals < 0) & (np.abs(pvals) <= 0.001)] = '${_{\\ast}}{_{\\ast}}{_{\\ast}}$'
    return anstr


[docs]
def sigmap(
    adata,
    value_key,
    group_key,
    sample_key=None,
    position_key=None,
    position_split=2,
    min_obs=0,
    basis_adata=None,
    basis_value_key=None,
    basis_group_key=None,
    basis_sample_key=None,
    basis_position_key=None,
    basis_position_split=None,
    basis_min_obs=None,
    fillna=None,
    restrict_groups=None,
    restrict_values=None,
    basis_restrict_groups=None,
    basis_restrict_values=None,
    p_corr='fdr_bh',
    method='mwu',
    reduction=None,
    normalization=None,
    assume_counts=None,
    reads=False,
    colors=None,
    axsize=None,
    value_dendrogram=False,
    group_dendrogram=False,
    value_order=None,
    group_order=None,
    ax=None,
):

    """\
    Plot heatmap of contribution to groups and mark significant differences
    with asterisks.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs`.
    value_key
        The `.obs` or `.obsm` key with the values to determine the
        enrichment for.
    group_key
        The `.obs` key with categorical group information.
    sample_key
        The `.obs` key with categorical sample information for p-value
        determination. If `None`, use only the aggregated data is plotted.
    position_key
        The `.obsm` key or array-like of `.obs` keys with the position space
        coordinates. If `None`, no position splits are performed.
    position_split
        The number of splits per spatial dimension before enrichment. Can be a
        tuple with the spatial dimension as length to assign a different split
        per dimension. If `None`, no position splits are performed. See also
        `min_obs`.
    min_obs
        The minimum number of observations per sample: if less observations are
        available, the sample is not used. This also limits the number of
        `position_split` to stop splitting if the split would decrease the
        number of observations below this threshold.
    basis_adata
        Another :class:`~anndata.AnnData` with annotation in `.obs`
        to compare. If `None`, only the `adata` composition is shown.
    basis_value_key
        The `.obs` or `.obsm` key for `basis_adata` with the values
        to determine the enrichment for. If `None`, `value_key` is used.
    basis_group_key
        The `.obs` key with categorical group information for
        `basis_adata`. If `None`, `value_key` is used.
    basis_sample_key
        The `.obs` key with categorical sample information for
        `basis_adata`. If `None`, `sample_key` is used.
    basis_position_key
        Like `position_key` but for `basis_adata`. If `None`, no position
        splits are performed.
    basis_position_split
        Like `position_split` but for `basis_adata`. If `None`,
        `position_split` is used.
    basis_min_obs
        Like `min_obs` but for `basis_adata`. If `None`, `min_obs` is used.
    fillna
        If `None`, observation containing NA in the values are filtered.
        Else, NA values are replaced with this value.
    restrict_groups
        A list-like containing the groups within which the enrichment analysis
        is to be done. If `None`, all groups are included.
    restrict_values
        A list-like containing the values within which the enrichment analysis
        is to be done. If `None`, all values are included. Works only for
        categorical values.
    basis_restrict_groups
        Like `restrict_groups` but for `basis_adata`.
    basis_restrict_values
        Like `restrict_values` but for `basis_adata`.
    p_corr
        The name of the p-value correction method to use. Possible values are
        the ones available in
        :func:`~statsmodels.stats.multitest.multipletests`. If `None`, no
        p-value correction is performed.
    method
        Specification of methods to use for enrichment. Available are:
        
        - 'fisher': Fishers exact test; only for categorical values. Ignores
          the `reduction` and `normalization` arguments.
        - 'mwu': MannWhitneyU test
        
    reduction
        The reduction to apply on each (group,sample) subset of the data.
        Possible values are:
        
        - 'sum': sum of the values over observations
        - 'mean': mean of the values over observations
        - 'median': median of the values over observations
        - `None`: use observations directly
        - a callable mapping a :class:`~pandas.DataFrame` to its reduced
          counterpart
    
    normalization
        The normalization to apply on each reduced (group,sample) subset of the
        data. Possible values are:
        
        - 'sum': normalize values by their sum (yields fractions)
        - 'percent': like 'sum' scaled by 100 (yields percentages)
        - 'gmean': normalize values by their geometric mean (yields
          contributions which make more sense for enrichments than fractions,
          due to zero-sum issue; see :func:`~tacco.tools.enrichments`)
        - 'clr': "Center logratio transform"; like 'gmean' with additional log
          transform; makes the distribution more normal and better suited for t
          tests
        - `None`: no normalization
        - a value name from `value_key`: all values are normalized to this
          contribution
        - a callable mapping a :class:`~pandas.DataFrame` to its normalized
          counterpart
    
    assume_counts
        Ony relevant for `normalization=='gmean'` and `normalization=='clr'`;
        whether to regularize zeros by adding a pseudo count of 1 or by
        replacing them by 1e-3 of the minimum value. If `None`, check whether
        the data are consistent with count data and assume counts accordingly,
        except if `reads==True`, then also `assume_counts==True`.
    reads
        Whether to weight the values by the total count per observation
    colors
        The mapping of value names to colors. If `None`, a set of standard
        colors is used.
    axsize
        Tuple of width and size of a single axis. If `None`, use automatic
        values.
    value_dendrogram
        Whether to draw a dendrogram for the values
    group_dendrogram
        Whether to draw a dendrogram for the groups
    value_order
        Set the order of the values explicitly; this option is incompatible
        with `value_dendrogram`.
    group_order
        Set the order of the groups explicitly; this option is incompatible
        with `group_dendrogram`.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting. Incompatible with dendrogram plotting.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure` if `ax` is `None`, else `None`.
    
    """
    
    pvals = enrichments(
        adata=adata,value_key=value_key,group_key=group_key,sample_key=sample_key, method=method,fillna=fillna,
        position_key=position_key, position_split=position_split,min_obs=min_obs,p_corr=p_corr,
        restrict_groups=restrict_groups,restrict_values=restrict_values, reads=reads,
        reduction=reduction,normalization=normalization,assume_counts=assume_counts,
    )
    if basis_adata is not None:
        if basis_position_split is None:
            basis_position_split = position_split
        if basis_min_obs is None:
            basis_min_obs = min_obs
        if basis_sample_key is None:
            basis_sample_key = sample_key
        if basis_group_key is None:
            basis_group_key = group_key
        if basis_value_key is None:
            basis_value_key = value_key
        basis_pvals = enrichments(
            adata=basis_adata,value_key=basis_value_key,group_key=basis_group_key,sample_key=basis_sample_key, method=method,fillna=fillna,
            position_key=basis_position_key, position_split=basis_position_split,min_obs=basis_min_obs,p_corr=p_corr,
            restrict_groups=basis_restrict_groups,restrict_values=basis_restrict_values, reads=reads,
            reduction=reduction,normalization=normalization,assume_counts=assume_counts,
        )
        basis_pvals.rename(columns={basis_value_key:value_key}, inplace=True)
        basis_pvals[basis_group_key] = basis_pvals[basis_group_key].cat.rename_categories(lambda c: c + ' (reference)')
        basis_pvals.rename(columns={basis_group_key:group_key}, inplace=True)
        joint_categories = [*list(basis_pvals[group_key].cat.categories), *list(pvals[group_key].cat.categories)]
        pvals = pd.concat([basis_pvals,pvals])
        pvals[group_key] = pvals[group_key].astype(pd.CategoricalDtype(joint_categories,ordered=True))

    ann = pd.pivot_table(pvals[pvals['enrichment']=='enriched'],values=f'p_{method}_{p_corr}',index=group_key,columns=value_key)
    annp = pd.pivot_table(pvals[pvals['enrichment']!='enriched'],values=f'p_{method}_{p_corr}',index=group_key,columns=value_key)
    ann[annp<ann] = -annp
    anstr = _asterisks_from_pvals(ann)

    fig = heatmap(
        adata=adata,value_key=value_key,group_key=group_key,
        basis_adata=basis_adata,basis_value_key=basis_value_key,basis_group_key=basis_group_key,
        fillna=fillna,restrict_groups=restrict_groups,restrict_values=restrict_values,
        basis_restrict_groups=basis_restrict_groups,basis_restrict_values=basis_restrict_values,
        reduction=reduction,normalization=normalization,reads=reads,colors=colors,axsize=axsize,annotation=('value',anstr),
        value_dendrogram=value_dendrogram, group_dendrogram=group_dendrogram,assume_counts=assume_counts,
        ax=ax, colorbar=False, complement_colors=False, cmap_center=None,
        value_order=value_order,group_order=group_order,
    );
    
    return fig



[docs]
def significances(
    significances,
    p_key,
    value_key,
    group_key,
    enrichment_key='enrichment',
    enriched_label='enriched',
    pmax=0.05,
    pmin=1e-5,
    annotate_pvalues=True,
    value_cluster=False,
    group_cluster=False,
    value_order=None,
    group_order=None,
    axsize=None,
    ax = None,
    scale_legend=1.0
):

    """\
    Plot enrichment significances.
    
    Parameters
    ----------
    significances
        A :class:`~pandas.DataFrame` with p-values and their annotation. If it
        contains significances for enrichment and depletion, this direction has
        to be specified with values "enriched" and something else (e.g.
        "depleted" or "purified") in a column "enrichment" of the DataFrame.
        See also the parameters `enrichment_key` and `enrichment_label`.
    p_key
        The key with the p-values.
    value_key
        The key with the values for which the enrichment was determined.
    group_key
        The key with the groups in which the enrichment was determined.
    enrichment_key
        The key with the direction of enrichment, i.e something like "enriched"
        and "purified". See also parameter `enriched_label`. Default:
        "enrichment".
    enriched_label
        The value under the key `enrichment_key` which indicates something like
        enrichment. Default: "enriched".
    pmax
        The maximum p-value to show.
    pmin
        The minimum p-value on the color scale.
    annotate_pvalues
        Whether to annotate p-values
    value_cluster
        Whether to cluster and reorder the values.
    group_cluster
        Whether to cluster and reorder the groups.
    value_order
        Set the order of the values explicitly with a list or to be close to
        diagonal by specifying "diag"; this option is incompatible with
        `value_cluster`.
    group_order
        Set the order of the groups explicitly with a list or to be close to
        diagonal by specifying "diag"; this option is incompatible with
        `group_cluster`.
    axsize
        Tuple of width and size of a single axis. If `None`, use
        automatic values.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting. Incompatible with dendrogram plotting.
    scale_legend
        Set to scale height and width of legend.  
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """

    small_value = 1e-300
    max_log = -np.log(pmin)
    min_log = -np.log(pmax)
    
    depleted_label = None
    if enrichment_key is not None:
        if enrichment_key not in significances:
            raise ValueError(f'The column "{enrichment_key}" does not exist in the supplied dataframe! If this is intentional and you want to supress this error, supply "enrichment_key=None" as argument.')
        unique_significance_labels = significances[enrichment_key].unique()
        if len(unique_significance_labels) == 1:
            enriched_label = unique_significance_labels[0]
        else:
            if len(unique_significance_labels) > 2 or enriched_label not in unique_significance_labels:
                raise ValueError(f'The column "{enrichment_key}" is expected to have exactly 2 different values: "{enriched_label}" and something else, e.g. "depleted" or "purified"! The supplied column has the values {significances[enrichment_key].unique()}!')
            depleted_label = unique_significance_labels[unique_significance_labels!=enriched_label][0]
    
    if depleted_label is not None:
        enr_e = pd.pivot(significances[significances[enrichment_key]==enriched_label], index=value_key, columns=group_key, values=p_key)
        enr_p = pd.pivot(significances[significances[enrichment_key]==depleted_label], index=value_key, columns=group_key, values=p_key)

        enr_e = np.maximum(enr_e,small_value)
        enr_p = np.maximum(enr_p,small_value)

        enr_p = enr_p.reindex_like(enr_e)

        enr = pd.DataFrame(np.where(enr_e < enr_p, -np.log(enr_e), np.log(enr_p)),index=enr_e.index,columns=enr_e.columns)
        ann = pd.DataFrame(np.where(enr_e < enr_p, enr_e, enr_p),index=enr_e.index,columns=enr_e.columns)
    else:
        ann = pd.pivot(significances, index=value_key, columns=group_key, values=p_key)
        enr = -np.log(ann)
        
    # avoid discrepancies between color and annotation by basing both color and annotation on cuts on the same values
    enr = pd.DataFrame(np.where(ann > pmax, 0, enr),index=enr.index,columns=enr.columns)
    ann = pd.DataFrame(np.where(ann > pmax, '', ann.applymap(lambda x: f'{x:.2}')),index=enr.index,columns=enr.columns)
    
    enr = enr.T
    ann = ann.T
    if not annotate_pvalues:
        ann = None

    # setup the plotting
    enriched_color = (1.0, 0.07058823529411765, 0.09019607843137255)
    depleted_color = (0.30196078431372547, 0.5215686274509804, 0.7098039215686275)
    null_color = (0.9,0.9,0.9)
    slightly_weight = 0.2
    slightly_enriched_color, slightly_depleted_color = mix_base_colors(
        np.array([[slightly_weight,1-slightly_weight,0.0],[0.0,1-slightly_weight,slightly_weight],]),
        np.array([list(enriched_color),list(null_color),list(depleted_color)])
    )
    
    if depleted_label is None:
        ct2 = min_log/max_log
        cdict = {'red': [[0.0,  null_color[0], null_color[0]],
                         [ct2,  null_color[0], slightly_enriched_color[0]],
                         [1.0,  enriched_color[0], enriched_color[0]]],
               'green': [[0.0,  null_color[1], null_color[1]],
                         [ct2,  null_color[1], slightly_enriched_color[1]],
                         [1.0,  enriched_color[1], enriched_color[1]]],
                'blue': [[0.0,  null_color[2], null_color[2]],
                         [ct2,  null_color[2], slightly_enriched_color[2]],
                         [1.0,  enriched_color[2], enriched_color[2]]]}
        cmap = LinearSegmentedColormap('sigmap', segmentdata=cdict, N=256)
        
        fig = heatmap(enr, None, None, cmap=cmap, cmap_vmin_vmax=(0.0,max_log), cmap_center=max_log/2, annotation=ann, colorbar=False, value_cluster=value_cluster, group_cluster=group_cluster, value_order=value_order, group_order=group_order, axsize=axsize, ax=ax);
    
    else:
        ct1 = 0.5 * (1 - min_log/max_log)
        ct2 = 0.5 * (1 + min_log/max_log)
        cdict = {'red': [[0.0,  depleted_color[0], depleted_color[0]],
                         [ct1,  slightly_depleted_color[0], null_color[0]],
                         [ct2,  null_color[0], slightly_enriched_color[0]],
                         [1.0,  enriched_color[0], enriched_color[0]]],
               'green': [[0.0,  depleted_color[1], depleted_color[1]],
                         [ct1,  slightly_depleted_color[1], null_color[1]],
                         [ct2,  null_color[1], slightly_enriched_color[1]],
                         [1.0,  enriched_color[1], enriched_color[1]]],
                'blue': [[0.0,  depleted_color[2], depleted_color[2]],
                         [ct1,  slightly_depleted_color[2], null_color[2]],
                         [ct2,  null_color[2], slightly_enriched_color[2]],
                         [1.0,  enriched_color[2], enriched_color[2]]]}
        cmap = LinearSegmentedColormap('sigmap', segmentdata=cdict, N=256)

        fig = heatmap(enr, None, None, cmap=cmap, cmap_vmin_vmax=(-max_log,max_log), cmap_center=0.0, annotation=ann, colorbar=False, value_cluster=value_cluster, group_cluster=group_cluster, value_order=value_order, group_order=group_order, axsize=axsize, ax=ax);

    rel_dpi_factor = fig.get_dpi() / 72
    height_pxl = 200 * rel_dpi_factor * scale_legend
    width_pxl = 15 * rel_dpi_factor * scale_legend
    offset_top_pxl = 0 * rel_dpi_factor * scale_legend
    offset_left_pxl = 30 * rel_dpi_factor * scale_legend

    if ax is None:
        ax = fig.axes[0]
    left,bottom = fig.transFigure.inverted().transform(ax.transAxes.transform((1,1))+np.array([offset_left_pxl,-offset_top_pxl-height_pxl]))
    width,height = fig.transFigure.inverted().transform(fig.transFigure.transform((0,0))+np.array([width_pxl,height_pxl]))
    cax = fig.add_axes((left, bottom, width, height))
    norm = Normalize(vmin=(0.0 if depleted_label is None else -max_log), vmax=max_log)
    cb = fig.colorbar(ScalarMappable(norm=norm, cmap=cmap), cax=cax)
    if depleted_label is None:
        cb.set_ticks([min_log,max_log])
        cb.set_ticklabels([pmax,pmin])
    else:
        cb.set_ticks([-max_log,-min_log,min_log,max_log])
        cb.set_ticklabels([pmin,pmax,pmax,pmin])
    cb.ax.annotate('enriched', xy=(0, 1), xycoords='axes fraction', xytext=(-3, -5), textcoords='offset pixels', horizontalalignment='right', verticalalignment='top', rotation=90, fontsize=10*scale_legend)
    cb.ax.annotate('insignificant', xy=(0, (0.0 if depleted_label is None else 0.5)), xycoords='axes fraction', xytext=(-3, 5), textcoords='offset pixels', horizontalalignment='right', verticalalignment=('bottom' if depleted_label is None else 'center'), rotation=90, fontsize=10*scale_legend)
    if depleted_label is not None:
        cb.ax.annotate('depleted', xy=(0, 0), xycoords='axes fraction', xytext=(-3, 5), textcoords='offset pixels', horizontalalignment='right', verticalalignment='bottom', rotation=90, fontsize=10*scale_legend)

    return fig


def _escape_math_special_characters(string):
    string = string.replace('_','\_')
    return string

def _get_co_occurrence(adata, analysis_key, show_only, show_only_center, colors, score_key, log_base):

    if analysis_key not in adata.uns:
        raise ValueError(f'`analysis_key` "{analysis_key}" is not found in `adata.uns`! Make sure to run tc.tl.co_occurrence first.')

    if score_key in adata.uns[analysis_key]:
        mean_scores = adata.uns[analysis_key][score_key]
        if score_key.startswith('log'):
            if log_base is not None:
                mean_scores = mean_scores / np.log(log_base)
    else:
        raise ValueError(f'The score_key {score_key!r} was not found!')
    
    if mean_scores is None:
        raise ValueError(f'The score {score_key!r} in `adata.uns[{analysis_key}]` is None!')
    
    intervals = adata.uns[analysis_key]['interval']
    annotation = adata.uns[analysis_key]['annotation']
    center = adata.uns[analysis_key]['center']
    
    if show_only is not None:
        if isinstance(show_only, str):
            show_only = [show_only]
        select = annotation.isin(show_only)
        if select.sum() < len(show_only):
            raise ValueError(f'The `show_only` categories {[s for s in show_only if s not in annotation]!r} are not available in the data!')
        annotation = annotation[select]
        mean_scores = mean_scores[select,:,:]
        # check if the order is the same as in the show_only selection
        permutation = np.argsort(annotation)[np.argsort(np.argsort(show_only))]
        if not np.all(permutation == np.arange(len(permutation))):
            annotation = annotation[permutation]
            mean_scores = mean_scores[permutation,:,:]

    if show_only_center is not None:
        if isinstance(show_only_center, str):
            show_only_center = [show_only_center]
        select = center.isin(show_only_center)
        if select.sum() < len(show_only_center):
            raise ValueError(f'The `show_only_center` categories {[s for s in show_only_center if s not in center]!r} are not available in the data!')
        center = center[select]
        mean_scores = mean_scores[:,select,:]
        # check if the order is the same as in the show_only_center selection
        permutation = np.argsort(center)[np.argsort(np.argsort(show_only_center))]
        if not np.all(permutation == np.arange(len(permutation))):
            center = center[permutation]
            mean_scores = mean_scores[:,permutation,:]

    colors, types = _get_colors(colors, pd.Series(annotation))
    
    return mean_scores, intervals, annotation, center, colors, types

def _get_cooc_expression_label(score_key,log_base):
    if score_key == 'occ':
        expression = '$\\frac{p(anno|center)}{p(anno)}$'
    elif score_key == 'log_occ':
        base_str = '' if log_base is None else f'_{log_base}'
        expression = '$log' + base_str + '\\left(\\frac{p(anno|center)}{p(anno)}\\right)$'
    elif score_key == 'z':
        expression = '$\\frac{log(N(anno,center))-random expectation}{standard deviation}$'
    elif score_key == 'composition':
        expression = '$p(anno|center)$'
    elif score_key == 'log_composition':
        base_str = '' if log_base is None else f'_{log_base}'
        expression = '$log' + base_str + '(p(anno|center))$'
    elif score_key == 'distance_distribution':
        expression = '$p(dist|anno,center)$'
    elif score_key == 'log_distance_distribution':
        base_str = '' if log_base is None else f'_{log_base}'
        expression = '$log' + base_str + '(p(dist|anno,center))$'
    elif score_key == 'relative_distance_distribution':
        expression = '$\\frac{p(dist|anno,center)}{p(dist|*,center)}$'
    elif score_key == 'log_relative_distance_distribution':
        base_str = '' if log_base is None else f'_{log_base}'
        expression = '$log' + base_str + '\\left(\\frac{p(dist|anno,center)}{p(dist|*,center)}\\right)$'
    else:
        base_str = '' if log_base is None else f'_{log_base}'
        expression = '$log' + base_str + '\\left(\\frac{p(anno|center)/p(anno)}{gmean\\left(p(anno|center)/p(anno)\\right)}\\right)$'
    return expression


[docs]
def co_occurrence(
    adata,
    analysis_key,
    score_key='log_occ',
    log_base=None,
    colors=None,
    show_only=None,
    show_only_center=None,
    axsize=(4,3),
    sharex=True,
    sharey='col',
    wspace=0.15,
    hspace=0.3,
    legend=True,
    grid=True,
    merged=False,
    ax=None
):

    """\
    Plot co-occurrence as determined by :func:`~tacco.tools.co_occurrence`.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with the co-occurence analysis in `.uns`.
        Can also be a mapping of labels to :class:`~anndata.AnnData` to specify
        multiple datasets.
    analysis_key
        The `.uns` key with the co-occurence analysis result.
    score_key
        The `.uns[analysis_key]` key of the score to use or the
        `.uns[analysis_key]['comparisons']` sub-key specifying the comparison
        to plot. Available keys `.uns[analysis_key]` include:
        
        - 'occ': co-occurrence
        - 'log_occ': logarithm of the co-occurrence
        - 'log2_occ': base-2-logarithm of the co-occurrence; this is a not a
          real key but a convenience function to rescale the 'log_occ' values
        - 'z': z-score of the log of the neighbourship counts with respect to
          random neighbourships
        - 'composition': distance dependent composition, `p(anno|center, dist)`
        - 'log_composition': log of 'composition'
        - 'distance_distribution': distribution of distances between anno and
          center ´p(dist|anno,center)´
        - 'log_distance_distribution': log of 'distance_distribution'
        - 'relative_distance_distribution': 'distance_distribution' normalized
          to `p(dist|*,center)`, the distance distribution of any annotation to
          the center
        - 'log_relative_distance_distribution': log of
          'relative_distance_distribution'
    log_base
        The base of the logarithm to use for plotting if `score_key` is
        'log_occ' or a comparison key. If `None`, use the natural logarithm.
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
    show_only
        A subset of annotation values to restrict the plotting to.
    show_only_center
        A subset of the center annotation values to restrict the plotting
        to.
    axsize
        Tuple of width and size of a single axis.
    sharex, sharey
        Whether and how to use common x/y axis. Options include `True`,
        `False`, "col", "row", "none", and "all".
    wspace, hspace
        Control the spacing between the plots.
    legend
        Whether to include the legend
    grid
        Whether to plot a grid
    merged
        Whether to merge the plots for all :class:`~anndata.AnnData` instances
        into a single row of plots. This makes only sense if more instances are
        provided in `adata`.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    adatas = _get_adatas(adata)
    
    fig = None
    linestyles = ['solid','dashed','dotted','dashdot']
    
    for adata_i, (adata_name, adata) in enumerate(adatas.items()):

        mean_scores, intervals, annotation, center, colors, types = _get_co_occurrence(adata, analysis_key, show_only, show_only_center, colors, score_key, log_base)
        
        if merged:
            
            if len(adatas) > len(linestyles):
                raise ValueError(f'`merged==True` is ony possible with up to {len(linestyles)} andatas!')
            
            
            if fig is None:
                if ax is not None:
                    if isinstance(ax, matplotlib.axes.Axes):
                        axs = np.array([[ax]])
                    else:
                        axs = ax
                    if axs.shape != (len(center), 1):
                        raise ValueError(f'The `ax` argument got the wrong shape of axes: needed is {(len(center), 1)!r} supplied was {axs.shape!r}!')
                    axsize = axs[0,0].get_window_extent().transformed(axs[0,0].get_figure().dpi_scale_trans.inverted()).size
                    fig = axs[0,0].get_figure()
                else:
                    fig, axs = subplots(len(center), 1, axsize=axsize, hspace=hspace, wspace=wspace, sharex=sharex, sharey=sharey, )


            for ir, nr in enumerate(center):
                x = (intervals[1:] + intervals[:-1]) / 2
                for ja, na in enumerate(annotation):
                    y = mean_scores[ja,ir,:]
                    linestyle = linestyles[adata_i]
                    axs[0,ir].plot(x, y, color=colors[na], linestyle=linestyle)
                
                if adata_i == len(adatas) - 1:
                    axs[0,ir].set_xlabel('distance')
                    adata_title = f'center {center.name}={nr}: '
                    anno_title = 'annotation: ' + annotation.name
                    axs[0,ir].set_xlabel('distance')
                    axs[0,ir].set_ylabel(anno_title)
                    expression = _get_cooc_expression_label(score_key,log_base)
                    axs[0,ir].set_title(adata_title + expression)
                    axs[0,ir].grid(grid)

        else:
            
            if fig is None:
                if ax is not None:
                    if isinstance(ax, matplotlib.axes.Axes):
                        axs = np.array([[ax]])
                    else:
                        axs = ax
                    if axs.shape != (len(center), len(adatas)):
                        raise ValueError(f'The `ax` argument got the wrong shape of axes: needed is {(len(center), len(adatas))!r} supplied was {axs.shape!r}!')
                    axsize = axs[0,0].get_window_extent().transformed(axs[0,0].get_figure().dpi_scale_trans.inverted()).size
                    fig = axs[0,0].get_figure()
                else:
                    fig, axs = subplots(len(center), len(adatas), axsize=axsize, hspace=hspace, wspace=wspace, sharex=sharex, sharey=sharey, )

            for ir, nr in enumerate(center):
                x = (intervals[1:] + intervals[:-1]) / 2
                for ja, na in enumerate(annotation):
                    y = mean_scores[ja,ir,:]
                    linestyle = linestyles[nr==na]
                    axs[adata_i,ir].plot(x, y, color=colors[na], linestyle=linestyle)

                axs[adata_i,ir].set_xlabel('distance')

                adata_title = f'{adata_name}, center {center.name}={nr}: ' if adata_name != '' else f'center {center.name}={nr}: '
                anno_title = 'annotation: ' + annotation.name
                axs[adata_i,ir].set_xlabel('distance')
                axs[adata_i,ir].set_ylabel(anno_title)

                expression = _get_cooc_expression_label(score_key,log_base)

                axs[adata_i,ir].set_title(adata_title + expression)

                axs[adata_i,ir].grid(grid)

    if legend:
        
        handles = []

        if merged:
            handles.extend([mlines.Line2D([], [], color='gray', label=adata_name, linestyle=linestyle) for ((adata_name, adata), linestyle) in zip(adatas.items(), linestyles) ])
        
        handles.extend([mpatches.Patch(color=color, label=ind) for (ind, color) in zip(annotation, colors[annotation]) ])
        
        axs[0,len(center)-1].legend(handles=handles, bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
            
    
    return fig



[docs]
def co_occurrence_matrix(
    adata,
    analysis_key,
    score_key='log_occ',
    log_base=None,
    colors=None,
    show_only=None,
    show_only_center=None,
    axsize=None,
    hspace=None,
    wspace=None,
    x_padding=2.0,
    y_padding=2.0,
    value_cluster=False,
    group_cluster=False,
    restrict_intervals=None,
    p_corr='fdr_bh',
    cmap='bwr',
    cmap_vmin_vmax=None,
    legend=True,
    ax = None,
    scale_legend=1.0
):
    """\
    Plot co-occurrence as determined by :func:`~tacco.tools.co_occurrence` or
    :func:`~tacco.tools.co_occurrence_matrix` as a matrix.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with the co-occurence analysis in `.uns`.
        Can also be a mapping of labels to :class:`~anndata.AnnData` to specify
        multiple datasets.
    analysis_key
        The `.uns` key with the co-occurence analysis result.
    score_key
        The `.uns[analysis_key]` key of the score to use. Available keys
        `.uns[analysis_key]` include:
        
        - 'occ': co-occurrence
        - 'log_occ': logarithm of the co-occurrence
        - 'log2_occ': base-2-logarithm of the co-occurrence; this is a not a
          real key but a convenience function to rescale the 'log_occ' values
        - 'z': z-score of the log of the neighbourship counts with respect to
          random neighbourships
        - 'composition': distance dependent composition, `p(anno|center, dist)`
        - 'log_composition': log of 'composition'
        - 'distance_distribution': distribution of distances between anno and
          center ´p(dist|anno,center)´
        - 'log_distance_distribution': log of 'distance_distribution'
        - 'relative_distance_distribution': 'distance_distribution' normalized
          to `p(dist|*,center)`, the distance distribution of any annotation to
          the center
        - 'log_relative_distance_distribution': log of
          'relative_distance_distribution'
    log_base
        The base of the logarithm to use for plotting if `score_key` is a log
        quantity. If `None`, use the natural logarithm.
    colors
        The mapping of value names to colors. If `None`, a set of
        standard colors is used.
    show_only
        A subset of annotation values to restrict the plotting to.
    show_only_center
        A subset of the center annotation values to restrict the plotting
        to.
    axsize
        Tuple of width and size of a single axis. If `None`, some heuristic
        value is used.
    hspace, vspace
        Relative horizontal and vertical spacing between plots
    x_padding, y_padding
        Absolute horizontal and vertical spacing between plots; this setting
        overrides `hspace` and `vspace`; if `None`, use the value from `hspace`
        `vspace`; if `None`, use the value from `vspace`
    value_cluster
        Whether to cluster and reorder the values.
    group_cluster
        Whether to cluster and reorder the groups.
    restrict_intervals
        A list-like containing the indices of the intervals to plot. If `None`,
        all intervals are included.
    cmap
        A string/colormap to override the `colors` with globally.
    cmap_vmin_vmax
        A tuple giving the range of values for the colormap.
    legend
        Whether to include the legend
    scale_legend
        Set to scale height and width of legend.
    ax
        The :class:`~matplotlib.axes.Axes` to plot on. If `None`, creates a
        fresh figure for plotting. Incompatible with dendrogram plotting.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    adatas = _get_adatas(adata)
    
    min_max = None

    # first pass through the data to get global min/max of the values for colormap
    for adata_i, (adata_name, adata) in enumerate(adatas.items()):

        mean_scores, intervals, annotation, center, colors, types = _get_co_occurrence(adata, analysis_key, show_only, show_only_center, colors, score_key=score_key, log_base=log_base)
        
        if min_max is None:
            if restrict_intervals is None:
                restrict_intervals = np.arange(len(intervals)-1)
            min_max = np.zeros((len(adatas),len(restrict_intervals),2))
        
        data = mean_scores[:,:,restrict_intervals]
        
        min_max[adata_i,:,:] = data.min(),data.max()
    
    min_max[:,:,0] = min_max[:,:,0].min()
    min_max[:,:,1] = min_max[:,:,1].max()
        
    if cmap_vmin_vmax is not None:
        min_max[:,:,:] = np.array(cmap_vmin_vmax)
    
    if axsize is None:
        axsize = (0.2*len(center),0.2*len(annotation))

    if ax is not None:
        if isinstance(ax, matplotlib.axes.Axes):
            axs = np.array([[ax]])
        else:
            axs = ax
        if axs.shape != (len(adatas), len(restrict_intervals)):
            raise ValueError(f'The `ax` argument got the wrong shape of axes: needed is {(len(adatas), len(restrict_intervals))!r} supplied was {axs.shape!r}!')
        axsize = axs[0,0].get_window_extent().transformed(axs[0,0].get_figure().dpi_scale_trans.inverted()).size
        fig = axs[0,0].get_figure()
    else:
        fig, axs = subplots(len(restrict_intervals), len(adatas), axsize=axsize, hspace=hspace, wspace=wspace, x_padding=x_padding, y_padding=y_padding, )
    
    # second pass for actual plotting
    for adata_i, (adata_name, adata) in enumerate(adatas.items()):

        mean_scores, intervals, annotation, center, colors, types = _get_co_occurrence(adata, analysis_key, show_only, show_only_center, colors, score_key=score_key, log_base=log_base)
        
        data = mean_scores[:,:,restrict_intervals]
        
        for _ii, rii in enumerate(restrict_intervals):
            interval = f'({intervals[rii]},{intervals[rii+1]})'
            
            ax = axs[adata_i,_ii]

            data = mean_scores[:,:,_ii]

            data = pd.DataFrame(data, index=annotation, columns=center).T

            heatmap(
                data,
                value_key=None, group_key=None,
                colors=colors,
                value_cluster=value_cluster, group_cluster=group_cluster,
                ax=ax,
                cmap=cmap,
                cmap_center=None,#(0 if log else None),
                cmap_vmin_vmax=min_max[adata_i,_ii],
                group_labels_rotation=90,
                colorbar=False,
            );

            adata_title = f'{adata_name}, interval {interval}: ' if adata_name != '' else f'interval {interval}: '

            expression = _get_cooc_expression_label(score_key,log_base)
                
            ax.set_title(adata_title + expression)

            anno_title = 'annotation: ' + _escape_math_special_characters(annotation.name)
            anno_center_title = 'center: ' + _escape_math_special_characters(center.name)
            ax.set_ylabel(anno_title)
            ax.set_xlabel(anno_center_title)

    if legend:
        _add_legend_or_colorbars(fig, axs, colors, cmap=cmap, min_max=min_max, scale_legend=scale_legend)
    
    return fig



[docs]
def annotated_heatmap(
    adata,
    obs_key=None,
    var_key=None,
    n_genes=None,
    var_highlight=None,
    obs_colors=None,
    var_colors=None,
    cmap='bwr',
    cmap_center=0,
    cmap_vmin_vmax=(-2,2),
    trafo=None,
    axsize=(4,4),
):
    """\
    Plots a heatmap of cells and genes grouped by categorical annotations.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` with annotation in `.obs` and/or `.var`.
    obs_key
        The `.obs` key with the categorical `obs` annotation to use. If `None`,
        the observations are not grouped and plotted in the order in which they
        appear in `adata`.
    var_key
        The `.var` key with the categorical `var` annotation to use. Can also
        be a mapping of annotations to list-likes of var names. If `None`, the
        genes are not grouped and plotted in the order in which they appear in
        `adata`. If `n_genes` is set, the meaning of this key is modified.
    n_genes
        The number of differentially expressed genes to find for the groups of
        observations. The differentially exressed genes will be used in place
        of a categorical `var` annotation. Setting `n_genes` changes the
        behaviour of `var_key`: It is interpreted as a categorical `.obs` key
        defining the groups of observations for which to derive the
        differentially expressed genes. If `var_key` is `None`, `obs_key` is
        used for that.
    var_highlight
        A list-like of var names to annotate.
    obs_colors
        A dict-like with the colors to use for the `obs_key` annotation. If
        `None`, default colors are used.
    var_colors
        A dict-like with the colors to use for the `var_key` annotation. If
        `None`, default colors are used, except if `n_genes` is set which
        triggers the usage of `obs_colors` for `var_colors`.
    cmap
        A string/colormap to use in the heatmap.
    cmap_center
        A value to use as center of the colormap. E.g. choosing `0` sets the
        central color to `0`. If `None`, the colormap spans the entire value
        range.
    cmap_vmin_vmax
        A tuple giving the range of values for the colormap. This can be
        modfied by `cmap_center`.
    trafo
        Whether to normalize, logarithmize, and scale the data prior to
        plotting. This makes sense if bare count data is supplied. If the data
        is already preprocessed, this can be set to `False`. If `None`, a
        heuristic tries to figure out whether count data was supplied, and if
        so performs the preprocessing.
    axsize
        Tuple of width and size of the main axis.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    if trafo is None:
        got_counts = True
        try:
            preprocessing.check_counts_validity(adata.X)
        except:
            got_counts = False
        if got_counts:
            print('`adata` looks like it contains bare counts and will be normalized, logarithmized, and scaled for plotting. If that is not desired (and to get rid of this message) explicitly set `trafo` to something else than `None`.')
            trafo = True
        else:
            trafo = False
    
    if trafo or n_genes:
        adata = adata.copy()
    if trafo:
        sc.pp.normalize_total(adata, target_sum=1e6)
        sc.pp.log1p(adata)
    
    if var_key is not None or n_genes is not None:
        if n_genes is not None:
            if var_key is not None:
                if hasattr(var_key, 'items'):
                    raise ValueError(f'`var_key` cannot be a dict-like if `n_genes` is set!')
                if var_key not in adata.obs:
                    raise ValueError(f'`var_key` {var_key!r} is not a column of `adata.obs` even though `n_genes` is set!')
                if not hasattr(adata.obs[var_key], 'cat'):
                    print(f'WARNING: `var_key` {var_key!r} is not a categorical column of `adata.obs` even though `n_genes` is set! Treating it as a categorical column...')
                group_key = var_key
            elif obs_key is not None:
                if obs_key not in adata.obs:
                    raise ValueError(f'`obs_key` {obs_key!r} is not a column of `adata.obs`!')
                if not hasattr(adata.obs[obs_key], 'cat'):
                    print(f'WARNING: `obs_key` {obs_key!r} is not a categorical column of `adata.obs`! Treating it as a categorical column...')
                group_key = obs_key
                if var_colors is None:
                    var_colors = obs_colors
            else:
                raise ValueError(f'`n_genes` can only be used if at least one of [`obs_key`, `var_key`] is not `None`!')
                
            ukey = utils.find_unused_key(adata.uns)
            sc.tl.rank_genes_groups(adata, group_key, key_added=ukey, n_genes=n_genes,)
            marker = pd.DataFrame(adata.uns[ukey]['names'])
            del adata.uns[ukey]
                
        else: # if var_key is not None:
            if hasattr(var_key, 'items'):
                marker = var_key
            else:
                if var_key not in adata.var:
                    raise ValueError(f'`var_key` {var_key!r} is not a column of `adata.var`!')
                if not hasattr(adata.var[var_key], 'cat'):
                    print(f'WARNING: `var_key` {var_key!r} is not a categorical column of `adata.var`! Treating it as a categorical column...')
                marker = {c: df.index for c,df in adata.var.groupby(var_key, observed=False)}

        all_marker = [c for l,m in marker.items() for c in m]
        # reorder genes to represent the annotation
        adata = adata[:,all_marker]
        
        if var_colors is None:
            var_colors = to_rgba_array(get_default_colors(len(marker)))
            var_colors = {cat:col for cat,col in zip(marker.keys(),var_colors)}
        else:
            var_colors = {cat:to_rgba(col) for cat,col in var_colors.items()}
            
        all_marker_colors = [var_colors[l] for l,m in marker.items() for g in m]
        
        all_marker_labels = [l for l,m in marker.items() for c in m]
        marker_centers = { l: np.median(np.arange(len(all_marker_labels))[np.array(all_marker_labels) == l]) for l in marker.keys() }
    
    if obs_key is not None:
        if obs_key not in adata.obs:
            raise ValueError(f'`obs_key` {obs_key!r} is not a column of `adata.obs`!')
        if not hasattr(adata.obs[obs_key], 'cat'):
            print(f'WARNING: `obs_key` {obs_key!r} is not a categorical column of `adata.obs`! Treating it as a categorical column...')

        cells = {c: df.index for c,df in adata.obs.groupby(obs_key, observed=False)}
        all_cells = [c for l,m in cells.items() for c in m]
        # reorder cells to represent the annotation
        adata = adata[all_cells]
        
        if obs_colors is None:
            obs_colors = to_rgba_array(get_default_colors(len(cells)))
            obs_colors = {cat:col for cat,col in zip(cells.keys(),obs_colors)}
        else:
            obs_colors = {cat:to_rgba(col) for cat,col in obs_colors.items()}
            
        all_cell_colors = [obs_colors[l] for l,m in cells.items() for c in m]
        
        all_cell_labels = [l for l,m in cells.items() for c in m]
        cell_centers = { l: np.median(np.arange(len(all_cell_labels))[np.array(all_cell_labels) == l]) for l in cells.keys() }
    
    if var_highlight is not None:
        # reorder highlighted genes to simplify labelling
        var_highlight = pd.Series(adata.var.index)[adata.var.index.isin(var_highlight)]
        highlight_centers = pd.Series(var_highlight.index, index=var_highlight )

    if trafo:
        if adata.is_view:
            adata = adata.copy()
        sc.pp.scale(adata)
    
    data = adata.X
    if scipy.sparse.issparse(data):
        data = data.A
    
    if cmap_vmin_vmax is None:
        cmap_vmin_vmax = [data.min(),data.max()]
    cmap_vmin_vmax = np.array(cmap_vmin_vmax)
    if cmap_center is not None:
        shifted = cmap_vmin_vmax - cmap_center
        abs_max = np.max(np.abs(shifted))
        shifted[:] = [-abs_max,abs_max]
        cmap_vmin_vmax = shifted + cmap_center

    # plotting
    fig,axs=subplots(2,2,axsize=axsize,width_ratios=[0.03,axsize[0]/4],height_ratios=[0.03,axsize[1]/4],x_padding=0.05,y_padding=0.05)
    axs[0,0].axis('off')
    im = axs[1,1].imshow(data.T,aspect='auto',cmap=cmap, vmin=cmap_vmin_vmax[0], vmax=cmap_vmin_vmax[1])
    axs[1,1].set_xticks([])
    axs[1,1].set_yticks([])
    rel_dpi_factor = fig.get_dpi() / 72
    cax_width = 100 * rel_dpi_factor # color bar width in pixel
    cax_height = 10 * rel_dpi_factor # color bar height in pixel
    cax_offset = 10 * rel_dpi_factor # color bar y offset in pixel
    cax_l = 1 - fig.transFigure.inverted().transform([cax_width,0])[0] + fig.transFigure.inverted().transform([0,0])[0]
    cax_b = 0 - fig.transFigure.inverted().transform([0,cax_height+cax_offset])[1] + fig.transFigure.inverted().transform([0,0])[1]
    cax_w = fig.transFigure.inverted().transform([cax_width,0])[0] - fig.transFigure.inverted().transform([0,0])[0]
    cax_h = fig.transFigure.inverted().transform([0,cax_height])[1] - fig.transFigure.inverted().transform([0,0])[1]
    cax = fig.add_axes((cax_l,cax_b,cax_w,cax_h))
    fig.colorbar(im, cax=cax, orientation='horizontal')

    # labelling
    def _collides(ann1, ann2, offset1, offset2, fig, direction):
        if offset1 != offset2:
            return False
        extent1 = ann1.get_window_extent(fig.canvas.get_renderer())
        extent2 = ann2.get_window_extent(fig.canvas.get_renderer())
        if direction == 'x':
            return extent2.x1 > extent1.x0
        else:
            return extent1.y1 > extent2.y0

    def _collides_any(ann, off, anns, offs, fig, direction):
        for ann2, off2 in zip(anns, offs):
            if _collides(ann, ann2, off, off2, fig, direction):
                return True
        return False
    def _find_offset(ann, anns, offs, fig, direction):
        off = 0
        while _collides_any(ann, off, anns, offs, fig, direction):
            off += 1
        return off
    def _find_shift(ann, anns, fig, direction):
        if len(anns) == 0:
            return 0
        extent1 = ann.get_window_extent(fig.canvas.get_renderer())
        extent2 = anns[-1].get_window_extent(fig.canvas.get_renderer())
        if direction == 'x':
            delta = extent2.x1 - extent1.x0
        else:
            delta = extent1.y1 - extent2.y0
        if delta > 0:
            return delta
        else:
            return 0

    if obs_key is not None:
        axs[0,1].imshow(np.array([all_cell_colors]),aspect='auto')
        axs[0,1].set_xticks([])
        axs[0,1].set_yticks([])

        anns = []
        offs = []
        
        bar = (axs[0,1].transData.inverted().transform([0,-15*rel_dpi_factor])[1] - axs[0,1].transData.inverted().transform([0,0])[1])
        for l,c in cell_centers.items():
            ann = axs[0,1].annotate(l, (c, -0.5), (c, -0.5-bar), ha="center", va="center", rotation=0, size=10, arrowprops={'arrowstyle':'-'},)
            offset = _find_offset(ann, anns, offs, fig, direction='x')
            ann.xyann = (ann.xyann[0],ann.xyann[1]-offset*1.8)
            anns.append(ann)
            offs.append(offset)
    else:
        axs[0,1].axis('off')

    if var_key is not None or n_genes is not None:
        axs[1,0].imshow(np.array([all_marker_colors]).swapaxes(0,1),aspect='auto')
        axs[1,0].set_xticks([])
        axs[1,0].set_yticks([])

        anns=[]
        bar = (axs[1,0].transData.inverted().transform([15*rel_dpi_factor,0])[0] - axs[1,0].transData.inverted().transform([0,0])[0])
        for l,c in marker_centers.items():
            ann = axs[1,0].annotate(l, (-0.5, c), (-0.5-bar, c), ha="right", va="center", rotation=0, size=10, arrowprops={'arrowstyle':'-'},)
            shift = _find_shift(ann, anns, fig, direction='y')
            shift = (axs[1,0].transData.inverted().transform([0,shift])[1] - axs[1,0].transData.inverted().transform([0,0])[1])
            ann.xyann = (ann.xyann[0],ann.xyann[1]-shift)
            anns.append(ann)
    else:
        axs[1,0].axis('off')

    if var_highlight is not None:
        anns=[]
        bar = (axs[1,1].transData.inverted().transform([15*rel_dpi_factor,0])[0] - axs[1,1].transData.inverted().transform([0,0])[0])
        offset = (axs[1,1].transData.inverted().transform([0*rel_dpi_factor,0])[0] - axs[1,1].transData.inverted().transform([0,0])[0]) - 0.5
        for l,c in highlight_centers.items():
            ann = axs[1,1].annotate(l, (len(adata.obs.index)+offset, c), (len(adata.obs.index)+offset+bar, c), ha="left", va="center", rotation=0, size=10, arrowprops={'arrowstyle':'-'}, annotation_clip=False,)
            shift = _find_shift(ann, anns, fig, direction='y')
            shift = (axs[1,1].transData.inverted().transform([0,shift])[1] - axs[1,1].transData.inverted().transform([0,0])[1])
            ann.xyann = (ann.xyann[0],ann.xyann[1]-shift)
            anns.append(ann)
    
    return fig


@njit(parallel=False,fastmath=True,cache=True)
def _anno_hist(mist, intervals, weights, anno):
    Nobs,Nanno = anno.shape
    assert(Nobs==len(mist))
    assert(Nobs==len(weights))
    Nd = len(intervals)-1
    hist = np.zeros((Nd,))
    sums = np.zeros((Nd,Nanno))
    for i in range(Nobs):
        di = mist[i]
        _di = np.argmax(di <= intervals) - 1
        if di > 0:
            hist[_di] += weights[i]
            sums[_di] += weights[i] * anno[i]
    for d in range(Nd):
        sums[d] /= hist[d]
    return sums


[docs]
def annotation_coordinate(
    adata,
    annotation_key,
    coordinate_key,
    group_key=None,
    reference_key=None,
    max_coordinate=None,
    delta_coordinate=None,
    axsize=None,
    colors=None,
    stacked=True,
    verbose=1,
):
    """\
    Plots an annotation density with respect to a scalar coordinate.
    
    Parameters
    ----------
    adata
        A :class:`~anndata.AnnData`.
    annotation_key
        The `.obs` or `.obsm` key to plot.
    coordinate_key
        The `.obs` key or (`.obsm` key, column name) pair with the scalar
        coordinate(s).
    group_key
        A categorical group annotation. The plot is done separately per group.
        If `None`, plots for only one group are generated.
    reference_key
        The `.obs` key to use as weights (i.e. the weight to use for
        calculating the annotation density). If `None`, use `1` per
        observation, which makes sense if the annotation is categorical or
        fractional annotations which should sum to `1`.
    max_coordinate
        The maximum coordinate to use. If `None` or `np.inf`, uses the maximum
        coordinate in the data.
    delta_coordinate
        The width in coordinate for coordinate discretization. If `None`, takes
        `max_coordinate/100`.
    axsize
        Size of a single axis in the plot
    colors
        A mapping of annotation values to colors. If `None`, default colors are
        used.
    stacked
        Whether to plot the different annotations on different scales on
        separate stacked plots or on the same scale in a single plot.
    verbose
        Level of verbosity, with `0` (no output), `1` (some output), ...
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """
    
    if group_key is None:
        group_adatas = {'':adata}
    else:
        group_adatas = {group:adata[df.index] for group,df in adata.obs.groupby(group_key, observed=False) if len(df)>0 }
    
    if annotation_key in adata.obs:
        annotation = adata.obs[annotation_key]
        if hasattr(annotation, 'cat'):
            annotation = pd.get_dummies(annotation)
        else:
            annotation = pd.DataFrame(annotation)
    elif annotation_key in adata.obsm:
        annotation = adata.obsm[annotation_key].copy()
    else:
        raise ValueError(f'The `annotation_key` {annotation_key!r} is neither in `adata.obs` nor `adata.obsm`!')
    
    if pd.api.types.is_list_like(coordinate_key):
        if len(coordinate_key) == 2 and coordinate_key[0] in adata.obsm and coordinate_key[1] in adata.obsm[coordinate_key[0]]:
            coordinates = adata.obsm[coordinate_key[0]][coordinate_key[1]]
        else:
            raise ValueError(f'The `coordinate_key` {coordinate_key!r} is list/like, but not something of length 2 containing a `adata.obsm` key and a column name therein!')
        #coordinate_key = f'{coordinate_key[0]}:{coordinate_key[1]}'
        coordinate_key = f'{coordinate_key[1]}'
    elif coordinate_key in adata.obs:
        coordinates = adata.obs[coordinate_key]
    else:
        raise ValueError(f'The `coordinate_key` {coordinate_key!r} is not in `adata.obs` and not a valid specification for something in `adata.obsm`!')

    if max_coordinate is None or max_coordinate == np.inf:
        max_coordinate = coordinates.to_numpy().max()
    max_coordinate = float(max_coordinate)
    
    if delta_coordinate is None:
        delta_coordinate = max_coordinate / 100
    n_intervals = int(max_coordinate / delta_coordinate)
    max_coordinate = n_intervals * delta_coordinate
    intervals = np.arange(0,max_coordinate+delta_coordinate*0.5,delta_coordinate)
    midpoints = (intervals[1:] + intervals[:-1]) * 0.5
    
    if reference_key is None:
        reference_weights = pd.Series(np.ones(len(adata.obs.index),dtype=float),index=adata.obs.index)
    elif reference_key in adata.obs:
        reference_weights = adata.obs[reference_key]
    else:
        raise ValueError(f'The `reference_key` {reference_key!r} is not in `adata.obs`!')
    
    annotation_categories = annotation.columns
    colors,annotation_categories = _get_colors(colors, annotation_categories)
    
    if stacked:
        if axsize is None:
            axsize = (6,0.7)
        fig,axs = subplots(len(group_adatas),len(annotation.columns),axsize=axsize,sharex=True,sharey='row',y_padding=0,x_padding=0)
        
    else:
        if axsize is None:
            axsize = (6,4)
        fig,axs = subplots(len(group_adatas),1,axsize=axsize,sharex=True,sharey='row',y_padding=0,x_padding=0)


    for gi,(group,group_adata) in enumerate(group_adatas.items()):

        group_annotation = annotation.loc[group_adata.obs.index].to_numpy()
        group_coordinates = coordinates.loc[group_adata.obs.index].to_numpy()
        group_reference_weights = reference_weights.loc[group_adata.obs.index].to_numpy()

        no_nan = ~np.isnan(group_annotation).any(axis=1)
        anno = _anno_hist(group_coordinates[no_nan], intervals, group_reference_weights[no_nan], group_annotation[no_nan])
        anno = pd.DataFrame(anno,index=midpoints,columns=annotation.columns)

        if stacked:
            for i,c in enumerate(colors.index):
                axs[i,gi].plot(anno[c],c=colors[c],label=c)
                axs[i,gi].xaxis.grid(True)
        else:
            for i,c in enumerate(colors.index):
                axs[0,gi].plot(anno[c],c=colors[c],label=c)
            axs[0,gi].xaxis.grid(True)

        group_string = f' in {group}' if group != '' else ''
        axs[0,gi].set_title(f'{annotation_key} VS distance from {coordinate_key}{group_string}')
        
    axs[0,-1].legend(handles=[mpatches.Patch(color=color, label=ind) for (ind, color) in colors.items() ],
        bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
    
    return fig



[docs]
def dotplot(
    adata,
    genes,
    group_key,
    log1p=True,
    marks=None,
    marks_colors=None,
    swap_axes=True,
):

    """\
    Dot plot of expression values.
    
    This is similar to :func:`scanpy.pl.dotplot` with customizations, e.g. the
    option to mark selected dots.
    
    Parameters
    ----------
    adata
        An :class:`~anndata.AnnData` including annotation in `.obs`.
    genes
        The `.var` index values to compare use as a list-like.
    group_key
        An `.obs` key with categorical group information.
    log1p
        Whether to log1p-transform the data prior to plotting.
    marks
        A :class:`pandas.DataFrame` containing categorical markers for the dots
        with genes in the rows and groups in the columns.
    marks_colors
        A mapping from the categories in `marks` to colors; if `None`, default
        colors are used.
    swap_axes
        If `False`, the x axis contains the genes and the y axis the groups.
        Otherwise the axes are swapped.
        
    Returns
    -------
    A :class:`~matplotlib.figure.Figure`.
    
    """

    if not pd.Index(genes).isin(adata.var.index).all():
        raise ValueError(f'The genes {pd.Index(genes).difference(adata.var.index)!r} are not available in `adata.var`!')

    markers = genes[::-1]

    if group_key not in adata.obs.columns:
        raise ValueError(f'The `group_key` {group_key!r} is not available in `adata.obs`!')

    if hasattr(adata.obs[group_key], 'cat'):
        cluster = adata.obs[group_key].cat.categories
    else:
        cluster = adata.obs[group_key].unique()

    if swap_axes:
        xticklabels = cluster
        yticklabels = markers
    else:
        xticklabels = markers
        yticklabels = cluster
    
    fig,axs = subplots(axsize=0.25*np.array([len(xticklabels),len(yticklabels)]))

    x = np.arange(len(xticklabels))   # the label locations
    y = np.arange(len(yticklabels))   # the label locations
    if not swap_axes:
        x = x[::-1]
        y = y[::-1]
    axs[0,0].set_xticks(x)
    axs[0,0].set_xticklabels(xticklabels, rotation=45, ha='right',)
    axs[0,0].set_yticks(y)
    axs[0,0].set_yticklabels(yticklabels)
    axs[0,0].set_xlim((x.min()-0.5,x.max()+0.5))
    axs[0,0].set_ylim((y.min()-0.5,y.max()+0.5))

    axs[0,0].set_axisbelow(True)
    axs[0,0].grid(True)

    marker_counts = adata[:,markers].to_df()
    if log1p:
        marker_counts = np.log1p(marker_counts)
    mean_exp = pd.DataFrame({c: marker_counts.loc[df.index].mean(axis=0) for c,df in adata.obs.groupby(group_key, observed=False) })
    mean_pos = pd.DataFrame({c: (marker_counts.loc[df.index] != 0).mean(axis=0) for c,df in adata.obs.groupby(group_key, observed=False) })

    if marks is not None:
        marks = marks.reindex_like(mean_pos)

    mean_exp_index_name = 'index' if mean_exp.index.name is None else mean_exp.index.name
    mean_pos_index_name = 'index' if mean_pos.index.name is None else mean_pos.index.name
    mean_exp = pd.melt(mean_exp, ignore_index=False).reset_index().rename(columns={mean_exp_index_name:'value','variable':'cluster','value':'mean_exp'})
    mean_pos = pd.melt(mean_pos, ignore_index=False).reset_index().rename(columns={mean_pos_index_name:'value','variable':'cluster','value':'mean_pos'})

    if marks is not None:
        marks.index.name = None
        marks.columns.name = None
        marks_index_name = 'index' if marks.index.name is None else marks.index.name
        marks = pd.melt(marks, ignore_index=False).reset_index().rename(columns={marks_index_name:'value','variable':'cluster','value':'marks'})
        if marks_colors is None:
            marks_colors = get_default_colors(marks['marks'].unique())

    all_df = pd.merge(mean_exp, mean_pos, on=['value', 'cluster'])

    if marks is not None:
        all_df = pd.merge(all_df, marks, on=['value', 'cluster'], how='outer')
        if all_df['marks'].isna().any():
            raise ValueError(f'There were gene-group combinations without a match in "marks"!')

    if swap_axes:
        all_df['x'] = all_df['cluster'].map(pd.Series(x,index=xticklabels))
        all_df['y'] = all_df['value'].map(pd.Series(y,index=yticklabels))
    else:
        all_df['x'] = all_df['value'].map(pd.Series(x,index=xticklabels))
        all_df['y'] = all_df['cluster'].map(pd.Series(y,index=yticklabels))

    legend_items = []

    mean_exp_min, mean_exp_max = all_df['mean_exp'].min(), all_df['mean_exp'].max()
    norm = Normalize(vmin=mean_exp_min, vmax=mean_exp_max)
    cmap='Reds'#LinearSegmentedColormap.from_list('mean_exp', [(0,(1, 1, 1)),(1,(1, g, b))])
    mapper = ScalarMappable(norm=norm, cmap=cmap)
    color = [ tuple(x) for x in mapper.to_rgba(all_df['mean_exp'].to_numpy()) ]

    legend_items.append(mpatches.Patch(color='#0000', label='mean expression'))
    mean_exp_for_legend = np.linspace(mean_exp_min, mean_exp_max, 4)
    legend_items.extend([mpatches.Patch(color=color, label=f'{ind:.2f}') for color,ind in zip(mapper.to_rgba(mean_exp_for_legend),mean_exp_for_legend)])

    mean_pos_min, mean_pos_max = all_df['mean_pos'].min(), all_df['mean_pos'].max()
    def size_map(x):
        return (x/mean_pos_max * 14)**2
    size = size_map(all_df['mean_pos'])

    legend_items.append(mpatches.Patch(color='#0000', label='fraction of expressing cells'))
    mean_pos_for_legend = np.linspace(mean_pos_min, mean_pos_max, 5)[1:]
    legend_items.extend([mlines.Line2D([], [], color='#aaa', linestyle='none', marker='o', markersize=np.sqrt(size_map(ind)), label=f'{ind:.2f}') for ind in mean_pos_for_legend])

    edgecolors = '#aaa' if marks is None else all_df['marks'].map(marks_colors)

    if marks is not None:
        marks_name = marks_colors.name if hasattr(marks_colors, 'name') else ''
        legend_items.append(mpatches.Patch(color='#0000', label=marks_name))
        legend_items.extend([mlines.Line2D([], [], color='#aaa', linestyle='none', fillstyle='none', markeredgecolor=color, marker='o', markersize=np.sqrt(size_map(mean_pos_for_legend[-2])), label=f'{ind}') for ind,color in marks_colors.items()])

    axs[0,0].scatter(all_df['x'], all_df['y'], c=color, s=size, edgecolors=edgecolors)

    axs[0,0].legend(handles=legend_items, bbox_to_anchor=(1, 1), loc='upper left', ncol=1)

    return fig