Source code for batools.docs

# Released under the MIT License. See LICENSE for details.
#
"""Documentation generation functionality."""

from __future__ import annotations

import os
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING

from efro.util import utc_now, strict_partial
from efro.terminal import Clr

if TYPE_CHECKING:
    from concurrent.futures import Future

    from libcst import BaseExpression
    from libcst.metadata import CodeRange



[docs]
@dataclass
class AttributeInfo:
    """Info about an attribute of a class."""

    name: str
    attr_type: str | None = None
    docs: str | None = None



_g_genned_pdoc_with_dummy_modules = False  # pylint: disable=invalid-name



[docs]
def parse_docs_attrs(attrs: list[AttributeInfo], docs: str) -> str:
    """Given a docs str, parses attribute descriptions contained within."""
    docs_lines = docs.splitlines()
    attr_line = None
    for i, line in enumerate(docs_lines):
        if line.strip() in ['Attributes:', 'Attrs:']:
            attr_line = i
            break

    if attr_line is not None:
        # Docs is now everything *up to* this.
        docs = '\n'.join(docs_lines[:attr_line])

        # Go through remaining lines creating attrs and docs for each.
        cur_attr: AttributeInfo | None = None
        for i in range(attr_line + 1, len(docs_lines)):
            line = docs_lines[i].strip()

            # A line with a single alphanumeric word preceding a colon
            # is a new attr.
            splits = line.split(' ', maxsplit=1)
            if splits[0].replace('_', '').isalnum() and splits[-1].endswith(
                ':'
            ):
                if cur_attr is not None:
                    attrs.append(cur_attr)
                cur_attr = AttributeInfo(name=splits[0])
                if len(splits) == 2:
                    # Remove brackets and convert from
                    # (type): to type.
                    cur_attr.attr_type = splits[1][1:-2]

            # Any other line gets tacked onto the current attr.
            else:
                if cur_attr is not None:
                    if cur_attr.docs is None:
                        cur_attr.docs = ''
                    cur_attr.docs += line + '\n'

        # Finish out last.
        if cur_attr is not None:
            attrs.append(cur_attr)

        for attr in attrs:
            if attr.docs is not None:
                attr.docs = attr.docs.strip()
    return docs




[docs]
@dataclass
class SphinxSettings:
    """Our settings for sphinx stuff."""

    project_name: str
    project_author: str
    copyright: str
    version: str
    buildnum: int
    logo_small: str
    logo_large: str




[docs]
def get_sphinx_settings(projroot: str) -> SphinxSettings:
    """Settings for our Sphinx runs."""
    from batools.version import get_current_version

    version, buildnum = get_current_version(projroot=projroot)
    return SphinxSettings(
        project_name='Ballistica',
        project_author='Eric Froemling',
        copyright=f'{utc_now().year} Eric Froemling',
        version=version,
        buildnum=buildnum,
        logo_small=(
            'https://files.ballistica.net/'
            'ballistica_media/ballistica_logo_half.png'
        ),
        logo_large=(
            'https://files.ballistica.net/'
            'ballistica_media/ballistica_logo.png'
        ),
    )




[docs]
def generate_sphinx_docs() -> None:
    """Run docs generation with sphinx."""
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements

    import time
    import shutil
    from multiprocessing import cpu_count
    from concurrent.futures import ProcessPoolExecutor

    from jinja2 import Environment, FileSystemLoader

    # Make sure dummy-modules are up to date.
    subprocess.run(['make', 'dummymodules'], check=True)

    settings = get_sphinx_settings('.')

    cache_dir = Path('.cache/sphinx')
    sphinx_src_dir = Path('src/assets/sphinx')
    build_dir = Path('build/docs')
    template_dir = Path(sphinx_src_dir, 'template')
    static_dir = Path(sphinx_src_dir, 'static')

    filtered_data_dir = Path('.cache/sphinxfiltered')
    ba_data_filtered_dir = Path(filtered_data_dir, 'ba_data')
    dummy_modules_filtered_dir = Path(filtered_data_dir, 'dummymodules')
    tools_filtered_dir = Path(filtered_data_dir, 'tools')

    assert template_dir.is_dir()
    assert static_dir.is_dir()

    build_dir.mkdir(parents=True, exist_ok=True)
    cache_dir.mkdir(parents=True, exist_ok=True)

    os.environ['BALLISTICA_ROOT'] = os.getcwd()  # used in sphinx conf.py

    def _printstatus(msg: str) -> None:
        print(f'{Clr.BLU}{Clr.BLD}{msg}{Clr.RST}', flush=True)

    # Create copies of all Python sources we're documenting. This way we
    # can filter them beforehand to make some docs prettier (in
    # particular, the Annotated[] stuff we use a lot makes things very
    # ugly so we strip those out).
    _printstatus('Gathering sources...')
    subprocess.run(['rm', '-rf', filtered_data_dir], check=True)

    dirpairs: list[tuple[str, str]] = [
        ('src/assets/ba_data/python/', f'{ba_data_filtered_dir}/'),
        ('build/dummymodules/', f'{dummy_modules_filtered_dir}/'),
        ('tools/', f'{tools_filtered_dir}/'),
    ]
    for srcdir, dstdir in dirpairs:
        os.makedirs(dstdir, exist_ok=True)
        shutil.copytree(srcdir, dstdir, dirs_exist_ok=True)
        # subprocess.run(['cp', '-rv', srcdir, dstdir], check=True)

    # Filter all files. Doing this with multiprocessing gives us a very
    # nice speedup vs multithreading which seems gil-constrained.
    _printstatus('Filtering sources...')
    futures: list[Future] = []
    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
        for root, _dirs, files in os.walk(filtered_data_dir):
            for fname in files:
                if not fname.endswith('.py'):
                    continue
                fpath = os.path.join(root, fname)
                futures.append(
                    executor.submit(
                        strict_partial(_sphinx_pre_filter_file, fpath)
                    )
                )
    # Surface any exceptions.
    for future in futures:
        _ = future.result()

    # Lastly, copy mod-times from original files onto our filtered ones.
    # Otherwise the 'highlighting module code' step has to run on
    # everything each time which is crazy slow.
    def _copy_modtime(src_file: str, dest_file: str) -> None:
        if not os.path.isfile(dest_file):
            raise RuntimeError(f'Expected file not found: "{dest_file}".')

        # Get the modification time of the source file
        mod_time = os.path.getmtime(src_file)

        # Set the modification time of the destination file to match the
        # source
        os.utime(dest_file, (mod_time, mod_time))

    _printstatus('Updating source modtimes...')
    futures = []
    for srcdir, dstdir in dirpairs:
        for root, _dirs, files in os.walk(srcdir):
            for fname in files:
                if not fname.endswith('.py'):
                    continue
                fpath = os.path.join(root, fname)
                assert fpath.startswith(srcdir)
                dstpath = os.path.join(dstdir, fpath.removeprefix(srcdir))
                _copy_modtime(fpath, dstpath)

    _printstatus('Generating index.rst...')
    env = Environment(loader=FileSystemLoader(template_dir))
    index_template = env.get_template('index.rst_t')
    # maybe make it automatically render all files in templates dir in future
    with open(Path(cache_dir, 'index.rst'), 'w', encoding='utf-8') as index_rst:
        data = {
            # 'ballistica_image_url': 'https://camo.githubusercontent.com/25021344ceaa7def6fa6523f79115f7ffada8d26b4768bb9a0cf65fc33304f45/68747470733a2f2f66696c65732e62616c6c6973746963612e6e65742f62616c6c6973746963615f6d656469612f62616c6c6973746963615f6c6f676f5f68616c662e706e67',  # pylint: disable=line-too-long
            'version_no': settings.version,
            'build_no': str(settings.buildnum),
        }
        index_rst.write(index_template.render(data=data))

    starttime = time.monotonic()

    sphinx_apidoc_cmd = [
        'sphinx-apidoc',
        '--doc-author',
        settings.project_author,
        '--doc-version',
        str(settings.version),
        '--doc-release',
        str(settings.buildnum),
        '--output-dir',
        str(cache_dir),
    ]

    # Make sure we won't break some existing use of PYTHONPATH.
    assert 'PYTHONPATH' not in os.environ

    environ = dict(
        os.environ,
        # Prevent Python from writing __pycache__ dirs in our source tree
        # which leads to slight annoyances.
        PYTHONDONTWRITEBYTECODE='1',
        # Allow Ballistica stuff to partially bootstrap itself using
        # dummy modules.
        BA_RUNNING_WITH_DUMMY_MODULES='1',
        # Also prevent our set_canonical_module_names() stuff from running
        # which seems to prevent sphinx from parsing docs from comments. It
        # seems that sphinx spits out pretty class names based on where we
        # expose the classes anyway so its all good.
        EFRO_SUPPRESS_SET_CANONICAL_MODULE_NAMES='1',
        # Also set PYTHONPATH so sphinx can find all our stuff.
        PYTHONPATH=(
            f'{ba_data_filtered_dir}:'
            f'{tools_filtered_dir}:'
            f'{dummy_modules_filtered_dir}'
        ),
    )

    # To me, the default max-depth of 4 seems weird for these categories
    # we create. We start on our top level page with a high level view
    # of our categories and the modules & packages directly under them,
    # but then if we click a category we suddenly see an extremely long
    # exhaustive list of children of children of children. Going with
    # maxdepth 1 so we instead just see the top level stuff for that
    # category. Clicking anything there then takes us to the
    # ultra-detailed page, which feels more natural.
    module_list_max_depth = '1'

    # This makes package module docs the first thing you see when you
    # click a package which feels clean to me.
    module_first_arg = '--module-first'

    _printstatus('Generating runtimemodules...')
    subprocess.run(
        sphinx_apidoc_cmd
        + [
            '--doc-project',
            'Runtime',
            '--tocfile',
            'runtimemodules',
            module_first_arg,
            '--maxdepth',
            module_list_max_depth,
            '-f',
            str(ba_data_filtered_dir),
        ],
        check=True,
        env=environ,
    )

    # Both our common and our tools packages live in 'tools' dir. So we
    # need to build a list of things to ignore in that dir when creating
    # those two listings.
    excludes_tools: list[str] = []
    excludes_common: list[str] = []
    for name in os.listdir(tools_filtered_dir):

        # Skip anything not looking like a Python package.
        if (
            not Path(tools_filtered_dir, name).is_dir()
            or not Path(tools_filtered_dir, name, '__init__.py').exists()
        ):
            continue

        # Assume anything with 'tools' in the name goes with tools.
        exclude_list = excludes_common if 'tools' in name else excludes_tools
        exclude_list.append(str(Path(tools_filtered_dir, name)))

    _printstatus('Generating toolsmodules...')
    subprocess.run(
        sphinx_apidoc_cmd
        + [
            '--doc-project',
            'Tools',
            '--tocfile',
            'toolsmodules',
            module_first_arg,
            '--maxdepth',
            module_list_max_depth,
            '-f',
            str(tools_filtered_dir),
        ]
        + excludes_tools,
        env=environ,
        check=True,
    )

    _printstatus('Generating commonmodules...')
    subprocess.run(
        sphinx_apidoc_cmd
        + [
            '--doc-project',
            'Common',
            '--tocfile',
            'commonmodules',
            module_first_arg,
            '--maxdepth',
            module_list_max_depth,
            '-f',
            str(tools_filtered_dir),
        ]
        + excludes_common,
        env=environ,
        check=True,
    )

    # raise RuntimeError('SO FAR SO GOOD')

    _printstatus('Running sphinx-build...')
    subprocess.run(
        [
            'sphinx-build',
            '--fail-on-warning',
            '--conf-dir',
            static_dir,
            '--doctree-dir',
            cache_dir,
            cache_dir,  # input dir
            build_dir,  # output dir
        ],
        env=environ,
        check=True,
    )

    duration = time.monotonic() - starttime
    print(f'Generated sphinx documentation in {duration:.1f}s.')



def _sphinx_pre_filter_file(path: str) -> None:
    from typing import override

    import libcst as cst
    from libcst import CSTTransformer, Name, Index, Subscript

    filename = path
    filenameout = path

    class RemoveAnnotatedTransformer(CSTTransformer):
        """Replaces `Annotated[FOO, ...]` with just `FOO`"""

        @override
        def leave_Subscript(
            self, original_node: BaseExpression, updated_node: BaseExpression
        ) -> BaseExpression:
            if (
                isinstance(updated_node, Subscript)
                and isinstance(updated_node.value, Name)
                and updated_node.value.value == 'Annotated'
                and isinstance(updated_node.slice[0].slice, Index)
            ):
                return updated_node.slice[0].slice.value
            return updated_node

    with open(filename, 'r', encoding='utf-8') as f:
        source_code: str = f.read()

    tree: cst.Module = cst.parse_module(source_code)
    modified_tree: cst.Module = tree.visit(RemoveAnnotatedTransformer())

    final_code = modified_tree.code

    # It seems there's a good amount of stuff that sphinx can't create
    # links for because we don't actually import it at runtime; it is
    # just forward-declared under a 'if TYPE_CHECKING' block. We want to
    # actually import that stuff so that sphinx can find it. However we
    # can't simply run the code in the 'if TYPE_CHECKING' block because
    # we get cyclical reference errors (modules importing other ones
    # before they are finished being built). For now let's just
    # hard-code some common harmless imports at the end of filtered
    # files. Perhaps the ideal solution would be to run 'if
    # TYPE_CHECKING' blocks in the context of each module but only after
    # everything had been initially imported. Sounds tricky but could
    # work I think.
    if bool(False):
        final_code = final_code.replace(
            '\nif TYPE_CHECKING:\n',
            (
                '\nTYPE_CHECKING = True  # Docs-generation hack\n'
                'if TYPE_CHECKING:\n'
            ),
        )
    if bool(True):
        final_code = final_code + (
            '\n\n# Docs-generation hack; import some stuff that we'
            ' likely only forward-declared\n'
            '# in our actual source code so that docs tools can find it.\n'
            'from typing import (Coroutine, Any, Literal, Callable,\n'
            '  Generator, Awaitable, Sequence, Self)\n'
            'import asyncio\n'
            'from concurrent.futures import Future\n'
            'from pathlib import Path\n'
            'from enum import Enum\n'
        )

    with open(filenameout, 'w', encoding='utf-8') as f:
        f.write(final_code)


# Docs-generation hack; import some stuff that we likely only forward-declared
# in our actual source code so that docs tools can find it.
from typing import (Coroutine, Any, Literal, Callable,
  Generator, Awaitable, Sequence, Self)
import asyncio
from concurrent.futures import Future
from pathlib import Path
from enum import Enum