Source code for polyzymd.analyses.shared.paths
"""Path utilities for the analysis plugin system."""
from __future__ import annotations
import re
from collections.abc import Sequence
[docs]
def sanitize_label(label: str) -> str:
"""Convert a condition label to a filesystem-safe directory name.
Replaces ``%`` with ``pct``, spaces with underscores, strips remaining
non-alphanumeric chars (except hyphens, underscores, dots), and collapses
consecutive underscores.
Parameters
----------
label : str
Original condition label.
Returns
-------
str
Filesystem-safe label.
"""
sanitized = label.strip()
sanitized = sanitized.replace("%", "pct")
sanitized = sanitized.replace(" ", "_")
sanitized = re.sub(r"[^\w\-.]", "_", sanitized)
sanitized = re.sub(r"_+", "_", sanitized)
return sanitized.strip("_")
[docs]
def format_replicate_cache_token(replicates: Sequence[int]) -> str:
"""Format replicate IDs for cache filenames without range collisions.
Contiguous replicate IDs are compacted as a range, while non-contiguous
IDs are listed explicitly so ``(1, 3)`` cannot collide with ``(1, 2, 3)``.
Parameters
----------
replicates : Sequence[int]
Iterable of replicate IDs.
Returns
-------
str
Cache-safe token such as ``"reps1-3"`` or ``"reps1_3"``.
"""
try:
reps = sorted({int(rep) for rep in replicates})
except (TypeError, ValueError):
return "no_replicates"
if not reps:
return "no_replicates"
if reps == list(range(reps[0], reps[-1] + 1)):
return f"reps{reps[0]}-{reps[-1]}"
return "reps" + "_".join(str(rep) for rep in reps)