"""Automatic discovery of top-level Analysis plugin modules via ``pkgutil``.
Scans ``src/polyzymd/analyses/`` for plugin packages or modules, imports them, and
collects all concrete :class:`Analysis` subclasses. No bootstrap files,
no package-level registry edits, no decorators needed.
How Discovery Works
-------------------
1. ``pkgutil.iter_modules()`` yields direct children of ``polyzymd.analyses``.
2. Each non-infrastructure top-level module or package is imported via
``importlib.import_module()``.
3. All module-level names are inspected; concrete subclasses of
:class:`~polyzymd.analyses.base.Analysis` are collected.
4. Name collisions (two plugins with the same ``name``) raise immediately.
Contributor Impact
------------------
To add a new analysis, create a package in ``src/polyzymd/analyses/<name>/``
or a simple module at ``src/polyzymd/analyses/<name>.py``, define a class
inheriting from ``Analysis``, and set ``name`` as a ``ClassVar[str]``.
"""
from __future__ import annotations
import importlib
import inspect
import logging
import pkgutil
from functools import lru_cache
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from polyzymd.analyses.base import Analysis
logger = logging.getLogger("polyzymd.analyses")
# Modules that are infrastructure, not plugins
_SKIP_MODULES = frozenset(
{
"shared",
"base",
"stats",
"discovery",
"orchestrator",
"exceptions",
"mda",
"runner",
"config",
}
)
# Heavy optional dependencies that may not be installed in all environments
# ImportError for these is expected and should be silently skipped
_OPTIONAL_HEAVY_DEPS = frozenset(
{
"openmm",
"openff",
"MDAnalysis",
"mdanalysis",
"parmed",
"pdbfixer",
"espaloma_charge",
"dgl",
"torch",
"ambertools",
}
)
def _is_concrete_analysis(obj: type) -> bool:
"""Return True if *obj* is a concrete (non-abstract) Analysis subclass."""
from polyzymd.analyses.base import Analysis
return (
inspect.isclass(obj)
and issubclass(obj, Analysis)
and obj is not Analysis
and not getattr(obj, "__abstractmethods__", None)
)
def _should_skip_module(modname: str, package_prefix: str) -> bool:
"""Return True when module path includes skipped components.
Parameters
----------
modname : str
Fully qualified module name discovered by ``pkgutil``.
package_prefix : str
Base package prefix including trailing dot, for example
``"polyzymd.analyses."``.
Returns
-------
bool
True if any path component is private (starts with ``"_"``)
or listed in ``_SKIP_MODULES``.
"""
relative_name = modname
if modname.startswith(package_prefix):
relative_name = modname[len(package_prefix) :]
components = relative_name.split(".")
return any(component.startswith("_") or component in _SKIP_MODULES for component in components)
def _is_top_level_module(modname: str, package_prefix: str) -> bool:
"""Return whether *modname* is a direct module under ``polyzymd.analyses``.
Parameters
----------
modname : str
Fully qualified module name discovered by ``pkgutil``.
package_prefix : str
Base package prefix including trailing dot, for example
``"polyzymd.analyses."``.
Returns
-------
bool
``True`` when the relative module name has no package separator.
"""
relative_name = modname
if modname.startswith(package_prefix):
relative_name = modname[len(package_prefix) :]
return "." not in relative_name
def _discover_plugins() -> dict[str, type["Analysis"]]:
"""Import all analysis modules and collect concrete Analysis subclasses.
Returns
-------
dict[str, type[Analysis]]
Mapping from canonical analysis name to Analysis subclass.
Raises
------
RuntimeError
If two plugins register the same ``name``.
"""
import polyzymd.analyses as analyses_pkg
registry: dict[str, type[Analysis]] = {}
# Import only direct plugin packages and simple single-file plugin modules
package_path = analyses_pkg.__path__
package_prefix = analyses_pkg.__name__ + "."
for _, modname, is_pkg in pkgutil.iter_modules(package_path, prefix=package_prefix):
del is_pkg
# Skip infrastructure modules
if _should_skip_module(modname, package_prefix):
continue
try:
module = importlib.import_module(modname)
except ImportError as exc:
# Distinguish optional-dep failures (skip) from plugin bugs (re-raise)
failing_module = getattr(exc, "name", None) or ""
is_optional_dep = any(
failing_module == dep or failing_module.startswith(dep + ".")
for dep in _OPTIONAL_HEAVY_DEPS
)
if is_optional_dep:
logger.info(
"Skipping analysis module %s: optional dependency %r not available",
modname,
failing_module,
)
else:
logger.error(
"Failed to import analysis module %s: %s",
modname,
exc,
exc_info=True,
)
raise
continue
for attr_name in dir(module):
try:
obj = getattr(module, attr_name)
except AttributeError:
logger.debug(
"Could not access attribute %s.%s — skipping.",
modname,
attr_name,
)
continue # Module __getattr__ raised; skip this attribute
if not _is_concrete_analysis(obj):
continue
name = obj.name
if not name or not name.strip():
logger.warning(
"Analysis class %s.%s has empty name — skipping.",
obj.__module__,
obj.__qualname__,
)
continue
name = name.strip()
if name in registry:
existing = registry[name]
if existing is obj:
continue # Same class found in multiple imports (sub-package re-export)
raise RuntimeError(
f"Analysis name collision: both {existing.__module__}.{existing.__qualname__} "
f"and {obj.__module__}.{obj.__qualname__} use name={name!r}."
)
registry[name] = obj
logger.debug(f"Discovered analysis plugin: {name} ({obj.__qualname__})")
return registry
@lru_cache(maxsize=1)
def _cached_registry() -> dict[str, type["Analysis"]]:
"""Return canonical analysis registry with caching.
The cache is invalidated only by :func:`clear_cache`.
"""
return _discover_plugins()
[docs]
def clear_cache() -> None:
"""Clear the discovery cache. Useful in tests."""
_cached_registry.cache_clear()
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def get_analysis(name: str) -> type["Analysis"]:
"""Look up an Analysis class by canonical name.
Parameters
----------
name : str
Canonical analysis name, for example ``"rmsf"``.
Returns
-------
type[Analysis]
The concrete Analysis subclass.
Raises
------
KeyError
If no analysis matches *name*.
"""
registry = _cached_registry()
if name in registry:
return registry[name]
available = sorted(registry.keys())
raise KeyError(f"Unknown analysis {name!r}. Available: {', '.join(available)}")
[docs]
def list_analyses() -> dict[str, type["Analysis"]]:
"""Return all discovered analyses.
Returns
-------
dict[str, type[Analysis]]
Mapping ``canonical_name -> Analysis subclass``, sorted by name.
"""
registry = _cached_registry()
return dict(sorted(registry.items()))
[docs]
def list_all_names() -> list[str]:
"""Return all canonical analysis names, sorted.
Returns
-------
list[str]
All canonical names.
"""
registry = _cached_registry()
return sorted(registry.keys())