scatterplot: Add the testfunc parameter.

The scatterplot function can automatically perform a statistical test
between two subtracks.
master
Damien Goutte-Gattat 3 years ago
parent 8e9169ba7c
commit cd5d5eba36
  1. 33
      incenp/plotting/scatter.py
  2. 12
      incenp/plotting/util.py

@ -21,7 +21,7 @@ The purpose of this module is to facilitate the creation of scatter
plots from multi-indexed Panda datasets.
"""
from .util import xdistr
from .util import xdistr, get_stars
def scatterplot_subtrack(ax, data, n_track, n_subtrack, max_subtrack,
@ -49,7 +49,7 @@ def scatterplot_subtrack(ax, data, n_track, n_subtrack, max_subtrack,
def scatterplot(ax, data, columns, subtrackcolumns=False,
tracks=[None], trackname=0,
subtracks=[None], subtrackname=1,
colors='rgb', width=.7, min_sep=.1):
colors='rgb', width=.7, min_sep=.1, testfunc=None):
"""Create a scatterplot from multi-indexed data.
:param ax: The matplotlib axis to draw on
@ -70,6 +70,10 @@ def scatterplot(ax, data, columns, subtrackcolumns=False,
:param min_sep: If two values in a subtrack differ by less than this
parameter, they will be considered to belong on the same rank
and will be distributed along the X axis
:param testfunc: A statistical test function (must accept two data
series as arguments and return a pvalue); if set and there are
exactly two subtracks, the function will be called for each
track and the signficance will be displayed on top of the track
"""
# Get number of subtracks and track labels
@ -95,6 +99,7 @@ def scatterplot(ax, data, columns, subtrackcolumns=False,
# Columns as tracks
for column in columns:
j = 0
testset = []
for subtrack in subtracks:
if subtrack is None:
@ -102,31 +107,51 @@ def scatterplot(ax, data, columns, subtrackcolumns=False,
else:
level = subtrackname if data.index.nlevels > 1 else None
subset = data.xs(subtrack, level=level).loc[:, column].dropna()
testset.append(subset)
scatterplot_subtrack(ax, subset, i, j, n, colors[j % n], width, min_sep)
j += 1
if testfunc and len(subtracks) == 2:
_do_test(ax, testset, testfunc, i, n)
i += 1
else:
for track in tracks:
j = 0
testset = []
if isinstance(columns, list) and subtrackcolumns:
# Columns as subtracks
for column in columns:
subset = data.xs(track, level=trackname).loc[:, column].dropna()
testset.append(subset)
scatterplot_subtrack(ax, subset, i, j, n, colors[j % n], width, min_sep)
j += 1
i += 1
else:
for subtrack in subtracks:
indexer = [track, subtrack] if subtrack else [track]
level = [trackname, subtrackname] if subtrack else [trackname]
subset = data.xs(indexer, level=level).loc[:, columns].dropna()
testset.append(subset)
scatterplot_subtrack(ax, subset, i, j, n, colors[j % n], width, min_sep)
j += 1
i += 1
if testfunc and len(testset) == 2:
_do_test(ax, testset, testfunc, i, n)
i += 1
ax.set_xticks([(.5 * (n - 2)) + n * i for i in range(len(labels))])
ax.set_xticklabels(labels)
def _do_test(ax, testset, testfunc, n_track, max_subtrack):
pvalue = testfunc(testset[0], testset[1])
if pvalue:
y = max(testset[0].max(), testset[1].max()) * 1.1
offset = n_track * max_subtrack
ax.hlines(y, offset - .5, offset + 1.5)
ax.text(offset + .5, y + .01, get_stars(pvalue), ha='center')

@ -74,4 +74,14 @@ def xdistr(values, width, offset=0, even_max=10, center=False, min_sep=1):
xcoords.append(x)
return xcoords
def get_stars(pvalue):
if pvalue < 0.001:
return '***'
elif pvalue < 0.01:
return '**'
elif pvalue < 0.05:
return '*'
else:
return 'ns'

Loading…
Cancel
Save