# Released under the MIT License. See LICENSE for details.
#
"""Gather re-export docs info for sphinx.
For every documented module with ``__all__``, finds entries that are
instance re-exports (not classes/functions/modules), traces them to
their canonical home, and extracts the ``#:`` comment block plus
type info. The sphinx generator uses this to inject forward-declare
blocks into filtered consumer ``__init__.py`` files so re-exported
instances get documented at every public package, not only at the
canonical home.
Design rationale: ``docs/design/python-api-packages.md``. Anything
in a package's ``__all__`` is part of that package's public API and
should appear on its docs page.
Hard error if any public instance lacks a ``#:`` comment block at
its canonical home. Forces every public instance to be documented
rather than silently producing a stub.
"""
from __future__ import annotations
import os
import sys
import ast
import json
import inspect
import subprocess
import importlib
from dataclasses import dataclass
from typing import TYPE_CHECKING
from efro.terminal import Clr
from efro.error import CleanError
if TYPE_CHECKING:
from typing import Any
#: Packages we walk for re-export info. Anything not in this list
#: simply doesn't get the autogen treatment — its re-exported
#: instances won't be documented at the re-export site. The
#: ballistica top-level packages plus their lib siblings cover
#: the user-facing API surface; expand here when adding new
#: featuresets.
_PACKAGES: list[str] = [
'babase',
'baclassic',
'baplus',
'bascenev1',
'bascenev1lib',
'batemplatefs',
'bauiv1',
'bauiv1lib',
'baenv',
]
[docs]
@dataclass
class Injection:
"""One forward-declare to inject into a consumer ``__init__.py``."""
name: str # the public name (e.g. 'app')
type_str: str # the annotation text (e.g. "'babase.App'")
comment: str # the full #: block, including leading #: markers
[docs]
def gather_reexport_injections(
projroot: str, filtered_ba_data_dir: str
) -> dict[str, list[Injection]]:
"""Orchestrate: spawn worker, parse JSON, return injections.
Returns a mapping from absolute filtered-init-file path to a
list of injections to prepend at that file's top.
Raises ``CleanError`` if any public instance lacks a ``#:``
comment at its canonical home.
"""
print(f'{Clr.BLU}{Clr.BLD}' f'Gathering re-export docs info...{Clr.RST}')
# Use dummy-modules just like vanilla_completions and the
# sphinx-build subprocess itself; otherwise C-extension stubs
# don't resolve.
subprocess.run(['make', 'dummymodules'], check=True, cwd=projroot)
outpath = os.path.join(projroot, 'build', 'reexport_docs.json')
os.makedirs(os.path.dirname(outpath), exist_ok=True)
assert (
'PYTHONPATH' not in os.environ
), 'Refusing to clobber an existing PYTHONPATH'
# Source PYTHONPATH — we want imports to land in the real
# source files so ``inspect.getsourcefile`` and the AST walk
# see the ``#:`` comments where humans wrote them, not in the
# filtered copy (which may already have been edited).
ba_data = os.path.join(projroot, 'src/assets/ba_data/python')
tools = os.path.join(projroot, 'tools')
dummies = os.path.join(projroot, 'build/dummymodules')
environ = dict(
os.environ,
PYTHONDONTWRITEBYTECODE='1',
BA_RUNNING_WITH_DUMMY_MODULES='1',
PYTHONPATH=f'{ba_data}:{tools}:{dummies}',
)
subprocess.run(
[sys.executable, '-m', 'batools.reexportdocs', outpath],
env=environ,
check=True,
cwd=projroot,
)
with open(outpath, encoding='utf-8') as infile:
data = json.load(infile)
if data.get('missing'):
lines = [
f" {m['module']}.{m['name']}"
f" (canonical at {m['canonical_module']})"
for m in data['missing']
]
missing_count = len(data['missing'])
raise CleanError(
f'{missing_count} public instance(s) lack a #:'
f' comment block at their canonical home:\n'
+ '\n'.join(lines)
+ '\n\nAdd a #: block above each canonical assignment.'
' See docs/design/python-api-packages.md.'
)
# Worker reports by package name; map to filtered __init__.py paths.
result: dict[str, list[Injection]] = {}
for pkg, raw_injections in data['injections'].items():
init_path = _filtered_init_for_package(filtered_ba_data_dir, pkg)
if init_path is None:
print(
f'{Clr.YLW}reexportdocs: skipping {pkg!r} —'
f' no filtered __init__.py found{Clr.RST}',
file=sys.stderr,
)
continue
result[init_path] = [Injection(**inj) for inj in raw_injections]
return result
def _filtered_init_for_package(
filtered_ba_data_dir: str, pkg: str
) -> str | None:
"""Locate the consumer ``__init__.py`` (or single .py) under the
filtered tree for a given package name."""
candidate_dir = os.path.join(filtered_ba_data_dir, pkg, '__init__.py')
if os.path.isfile(candidate_dir):
return candidate_dir
candidate_single = os.path.join(filtered_ba_data_dir, f'{pkg}.py')
if os.path.isfile(candidate_single):
return candidate_single
return None
def _worker_main(outpath: str) -> None:
"""Worker side: import packages, gather + emit injection plan.
Invoked as ``python -m batools.reexportdocs <outpath>``.
"""
injections: dict[str, list[dict[str, str]]] = {}
missing: list[dict[str, str]] = []
for modname in _PACKAGES:
try:
mod = importlib.import_module(modname)
except Exception as exc:
print(
f'{Clr.YLW}reexportdocs: skipping {modname!r}'
f' (import failed: {exc}){Clr.RST}',
file=sys.stderr,
)
continue
all_names = getattr(mod, '__all__', None)
if not all_names:
continue
for name in all_names:
try:
value = getattr(mod, name)
except AttributeError:
continue
# Classes/functions/modules are handled directly by
# sphinx's autodoc machinery — they carry their own
# docstrings. Only instances need our help.
if (
inspect.isclass(value)
or inspect.isroutine(value)
or inspect.ismodule(value)
):
continue
decl = _find_canonical_decl(modname, name)
if decl is None:
# Cannot find an assignment — likely an unusual
# pattern (descriptor, dynamic attribute, etc.).
# Skip silently; not a clear documentation gap.
continue
canonical_module, comment, type_str = decl
# Resolve type via runtime if AST didn't get one (e.g.
# plain ``name = Foo()`` assignment with no annotation).
if type_str is None:
type_str = _runtime_type_str(value)
if comment is None:
missing.append(
{
'module': modname,
'name': name,
'canonical_module': canonical_module,
}
)
continue
injections.setdefault(modname, []).append(
{
'name': name,
'type_str': type_str,
'comment': comment,
}
)
with open(outpath, 'w', encoding='utf-8') as outfile:
json.dump(
{'injections': injections, 'missing': missing},
outfile,
indent=1,
sort_keys=True,
)
outfile.write('\n')
def _find_canonical_decl(
start_module: str, name: str
) -> tuple[str, str | None, str | None] | None:
"""Walk import chains to the assignment site for ``name``.
Returns ``(canonical_module, comment, type_str)`` where:
- ``canonical_module`` is the module name where the actual
assignment lives (after following any ``from X import name``
chains).
- ``comment`` is the ``#:`` block immediately above the
assignment (full text, including leading ``#:``), or
``None`` if there is no such block.
- ``type_str`` is the annotation text if the assignment is
annotated (``name: T = ...``), else ``None``.
Returns ``None`` if the assignment can't be located at all —
typically means the name is set dynamically (descriptor,
setattr, etc.).
"""
visited: set[str] = set()
current_module = start_module
current_name = name
while True:
if current_module in visited:
return None # cycle, give up
visited.add(current_module)
scan = _scan_module_for_name(current_module, current_name)
if scan is None:
return None
if isinstance(scan, _ScanFound):
return (current_module, scan.comment, scan.type_str)
current_module = scan.new_module
current_name = scan.new_name
@dataclass
class _ScanFound:
"""Returned by ``_scan_module_for_name`` when the assignment is here."""
comment: str | None
type_str: str | None
@dataclass
class _ScanRedirect:
"""Returned when the name is re-imported from another module."""
new_module: str
new_name: str
def _scan_module_for_name(
modname: str, name: str
) -> _ScanFound | _ScanRedirect | None:
"""Scan one module's source for ``name``.
Returns one of:
- ``_ScanFound`` if ``name`` is assigned here.
- ``_ScanRedirect`` if ``name`` is re-imported (follow the chain).
- ``None`` if the name doesn't appear at module-level here.
"""
try:
mod = importlib.import_module(modname)
except Exception:
return None
src = inspect.getsourcefile(mod)
if not src or not os.path.isfile(src):
return None
with open(src, encoding='utf-8') as infile:
source = infile.read()
try:
tree = ast.parse(source)
except SyntaxError:
return None
for node in tree.body:
if isinstance(
node, (ast.Assign, ast.AnnAssign)
) and _assignment_targets_name(node, name):
comment = _extract_hash_colon_block(source, node.lineno)
type_str: str | None = None
if isinstance(node, ast.AnnAssign):
try:
type_str = ast.unparse(node.annotation)
except Exception:
type_str = None
return _ScanFound(comment=comment, type_str=type_str)
if isinstance(node, ast.ImportFrom):
redirect = _import_from_redirect(node, name, modname)
if redirect is not None:
return redirect
return None
def _import_from_redirect(
node: ast.ImportFrom, name: str, current_module: str
) -> _ScanRedirect | None:
"""If ``node`` brings in ``name``, return where to redirect to."""
if not node.module:
return None
for alias in node.names:
matched_local = alias.asname if alias.asname else alias.name
if matched_local != name:
continue
target = _resolve_import_module(current_module, node.module, node.level)
new_name = alias.name if alias.asname else name
return _ScanRedirect(new_module=target, new_name=new_name)
return None
def _assignment_targets_name(
node: ast.Assign | ast.AnnAssign, name: str
) -> bool:
if isinstance(node, ast.AnnAssign):
return isinstance(node.target, ast.Name) and node.target.id == name
# ast.Assign — can have multiple targets, each can be tuple/list.
for target in node.targets:
if isinstance(target, ast.Name) and target.id == name:
return True
if isinstance(target, (ast.Tuple, ast.List)):
for elt in target.elts:
if isinstance(elt, ast.Name) and elt.id == name:
return True
return False
def _extract_hash_colon_block(source: str, assign_lineno: int) -> str | None:
"""Read the ``#:`` block immediately preceding an assignment.
Returns the full block text (each line including the leading
``#:`` marker, newline-joined) or ``None`` if there's no such
block.
"""
lines = source.splitlines()
# ast lineno is 1-based; convert to 0-based index.
idx = assign_lineno - 1
block: list[str] = []
i = idx - 1
while i >= 0:
stripped = lines[i].lstrip()
if stripped.startswith('#:'):
block.append(lines[i].strip())
i -= 1
continue
# Allow blank lines to interrupt? No — must be contiguous
# right above the assignment, per sphinx's convention.
break
if not block:
return None
block.reverse()
return '\n'.join(block)
def _resolve_import_module(current_module: str, module: str, level: int) -> str:
"""Resolve a ``from X import ...`` target name into absolute form."""
if level == 0:
return module
parts = current_module.split('.')
if level > len(parts):
return module
base = '.'.join(parts[: len(parts) - level + 1])
return f'{base}.{module}' if module else base
def _runtime_type_str(value: object) -> str:
"""Best-effort runtime type-name string for ``value``.
Used when the canonical assignment is bare (``name = Foo()``)
so we infer the type from the value. Returns a string suitable
for use as a string annotation (e.g. ``'babase.App'``).
"""
t = type(value)
mod = getattr(t, '__module__', None) or ''
qual = getattr(t, '__qualname__', None) or t.__name__
if mod and mod != 'builtins':
return f"'{mod}.{qual}'"
return f"'{qual}'"
[docs]
def apply_injections(
injections: dict[str, list[Injection]],
) -> None:
"""Prepend forward-declare blocks to filtered consumer files.
Inserts the synthesized block after any ``from __future__``
statements (so it doesn't disrupt PEP 236 ordering) and before
everything else. Idempotent in the sense that each ``make
docs`` rebuilds from a fresh copy of sources, so we never need
to detect previously-injected content.
"""
for path, items in injections.items():
if not items:
continue
with open(path, encoding='utf-8') as infile:
source = infile.read()
block = _build_injection_block(items)
new_source = _insert_after_future_imports(source, block)
with open(path, 'w', encoding='utf-8') as outfile:
outfile.write(new_source)
def _build_injection_block(items: list[Injection]) -> str:
"""Compose the synthesized declaration block.
Annotations are emitted as **string literals** so the consumer
module doesn't need to import every referenced type. sphinx
handles string annotations correctly via PEP 563 semantics, and
mypy/pylint/runtime never have to evaluate them.
"""
pieces = [
'# === Auto-generated re-export annotations'
' (see batools.reexportdocs) ===',
]
for item in items:
pieces.append('')
pieces.append(item.comment)
ann_quoted = _as_string_annotation(item.type_str)
pieces.append(f'{item.name}: {ann_quoted}')
pieces.append('# === End auto-generated ===')
return '\n'.join(pieces) + '\n'
def _as_string_annotation(type_str: str) -> str:
"""Wrap a type expression in a string literal.
``type_str`` may already be quoted (the runtime fallback path
returns ``'babase.App'`` with quotes); in that case return it
as-is. Otherwise wrap with single quotes, escaping any internal
single quotes.
"""
s = type_str.strip()
if (s.startswith("'") and s.endswith("'")) or (
s.startswith('"') and s.endswith('"')
):
return s
# Annotation expressions can legitimately contain single quotes
# (e.g. ``Literal['foo']``). Double-quote in that case.
if "'" in s:
return f'"{s}"'
return f"'{s}'"
def _insert_after_future_imports(source: str, block: str) -> str:
"""Splice ``block`` after the last ``from __future__`` import.
Falls back to inserting after the module docstring (if present)
or at the very top.
"""
lines = source.splitlines(keepends=True)
insert_at = 0
# Skip past a top-of-file docstring expressed as a string
# literal statement.
if lines and lines[0].lstrip().startswith(('"""', "'''", '#')):
# Simplest: just walk forward through comment/string lines.
pass
# Walk for the last from __future__ import.
last_future_idx = -1
for i, line in enumerate(lines):
stripped = line.lstrip()
if stripped.startswith('from __future__'):
last_future_idx = i
if last_future_idx >= 0:
insert_at = last_future_idx + 1
else:
# No __future__ import — insert at top.
insert_at = 0
return (
''.join(lines[:insert_at])
+ '\n'
+ block
+ '\n'
+ ''.join(lines[insert_at:])
)
if __name__ == '__main__':
if len(sys.argv) != 2:
raise CleanError('Expected single arg: <outpath>')
_worker_main(sys.argv[1])
# Docs-generation hack; import some stuff that we likely only forward-declared
# in our actual source code so that docs tools can find it.
from typing import (Coroutine, Any, Literal, Callable,
Generator, Awaitable, Sequence, Self)
import asyncio
from concurrent.futures import Future
from pathlib import Path
from enum import Enum