Source code for crema.writers.txt

"""Writer to save results in a tab-delmited format"""
from pathlib import Path
from collections import defaultdict

import pandas as pd


[docs]def to_txt( conf, output_dir=None, file_root=None, sep="\t", decoys=False, precision=6 ): """Save confidence estimates to delimited text files. Write the confidence estimates for each of the available levels (i.e. PSMs, peptides, proteins) to separate flat text files using the specified delimiter. If more than one collection of confidence estimates is provided, they will be combined, yielding a single file for each level specified by either dataset. Parameters ---------- conf : Confidence object or tuple of Confidence objects One or more :py:class:`~crema.confidence.Confidence` objects. output_dir : str or None, optional The directory in which to save the files. :code:`None` will use the current working directory. file_root : str or None, optional An optional prefix for the confidence estimate files. The suffix will always be "crema.{level}.txt" where "{level}" indicates the level at which confidence estimation was performed (i.e. PSMs, peptides, proteins). sep : str, optional The delimiter to use. decoys : bool, optional Save decoys confidence estimates as well? precision : int, optional Precision for float values. Returns ------- list of str The paths to the saved files. """ try: assert not isinstance(conf, str) iter(conf) except TypeError: conf = [conf] except AssertionError: raise ValueError("'conf' should be a Confidence object, not a string.") file_base = "crema" if file_root is not None: file_base = file_root + "." + file_base if output_dir is not None: file_base = Path(output_dir, file_base) results = defaultdict(list) for res in conf: for level, qval_list in _get_level_data(res, decoys).items(): results[level] += qval_list out_files = [] for level, qval_list in results.items(): out_file = str(file_base) + f".{level}.txt" pd.concat(qval_list).to_csv( out_file, sep=sep, index=False, float_format=f"%.{precision}f" ) out_files.append(out_file) return out_files
def _get_level_data(conf, decoys): """Return the dataframes for each level. Parameters ---------- conf : a Confidence object A LinearConfidence object. decoys : bool Should decoys be included? Returns ------- Dict Each entry contains a level, dataframe pair. """ results = defaultdict(list) for level, qvals in conf.confidence_estimates.items(): if qvals is None: continue results[level].append(qvals) if decoys: for level, qvals in conf.decoy_confidence_estimates.items(): if qvals is None: continue results[f"decoy.{level}"].append(qvals) return results