Source code for batools.builtinassetids

# Released under the MIT License. See LICENSE for details.
#
"""Generate C++ id-enum + load-block code for the construct asset-package.

Reads the cached bundle manifest at
``.cache/asset_bundle/gui-minimal/manifest.json``, walks each per-bucket CAS
manifest, and splices generated content into two pre-marked autogen
sections in checked-in source files:

* ``src/ballistica/base/base.h`` — between the
  ``// __AUTOGENERATED_BUILTIN_ASSET_IDS_BEGIN__`` and ``…_END__``
  markers: the four ``BuiltinTextureID`` / ``BuiltinCubeMapTextureID``
  / ``BuiltinSoundID`` / ``BuiltinMeshID`` enums + the
  ``kBuiltinAssetsApverid`` string constant.
* ``src/ballistica/base/assets/assets.cc`` — between the
  ``// __AUTOGENERATED_BUILTIN_ASSET_LOAD_BEGIN__`` and ``…_END__``
  markers inside ``Assets::StartLoading()``: one
  ``LoadBuiltinTexture(BuiltinTextureID::kFooBar, "<apverid>:foo/bar")``
  call per entry.

This runs as part of ``make update`` (not codegen): the autogen
sections live in checked-in files, and per the global build-system
doc, anything that touches checked-in files belongs in ``update``.
Idempotent — only writes a target file if the spliced result
differs from what's on disk, so steady-state ``make update`` calls
leave both files (and their mtimes) untouched.
"""

from __future__ import annotations

import json
import re
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING

from efro.error import CleanError

if TYPE_CHECKING:
    from typing import Any


def _packages_from_manifest(
    bundle: dict[str, Any],
) -> list[tuple[str, dict[str, str]]]:
    """Return ``(apverid, flavor_manifests)`` pairs from a parsed manifest."""
    return [
        (apv, e['flavor_manifests'])
        for apv, e in bundle.get('asset_package_versions', {}).items()
    ]



[docs]
class AssetKind(Enum):
    """Which of the four C++ enums an entry belongs to."""

    TEXTURE = 'texture'
    CUBE_MAP_TEXTURE = 'cube_map_texture'
    SOUND = 'sound'
    MESH = 'mesh'

    @property
    def cpp_enum_name(self) -> str:
        """C++ enum class name for this kind."""
        return {
            AssetKind.TEXTURE: 'BuiltinTextureID',
            AssetKind.CUBE_MAP_TEXTURE: 'BuiltinCubeMapTextureID',
            AssetKind.SOUND: 'BuiltinSoundID',
            AssetKind.MESH: 'BuiltinMeshID',
        }[self]

    @property
    def cpp_loader_name(self) -> str:
        """C++ ``LoadBuiltin*`` function name for this kind."""
        return {
            AssetKind.TEXTURE: 'LoadBuiltinTexture',
            AssetKind.CUBE_MAP_TEXTURE: 'LoadBuiltinCubeMapTexture',
            AssetKind.SOUND: 'LoadBuiltinSound',
            AssetKind.MESH: 'LoadBuiltinMesh',
        }[self]



# Map bucket-id-prefix → (asset-kind, ba_data subdir prefix to strip).
# Bucket ids look like e.g. ``textures/fallback_v1_regular`` or just
# ``constant``. The first path-segment of the bucket id (before the
# slash, if any) drives the dispatch.
_TEXTURE_EXTS = {'.dds', '.ktx', '.ktx2', '.pvr'}
_MESH_EXTS = {'.bob', '.bmsh'}
_SOUND_EXTS = {'.ogg', '.wav'}



[docs]
@dataclass
class AssetEntry:
    """One asset, post-grouping & validation."""

    kind: AssetKind
    # Logical name within the package (no leading ba_data/<bucket>/
    # prefix, no extension). E.g. ``mydir/helloworld``.
    logical_name: str
    # Original logical path including bucket prefix + extension.
    # Kept for error messages / debugging.
    full_logical_path: str

    @property
    def cpp_enum_entry(self) -> str:
        """``kMydirHelloworld`` form."""
        return 'k' + ''.join(
            _pascal_case(seg) for seg in self.logical_name.split('/')
        )




[docs]
@dataclass
class BuildResult:
    """Output collected before writing to disk."""

    apverid: str
    entries: list[AssetEntry] = field(default_factory=list)


[docs]
    def entries_for(self, kind: AssetKind) -> list[AssetEntry]:
        """Entries of a given asset kind, sorted by enum-entry name."""
        return sorted(
            (e for e in self.entries if e.kind == kind),
            key=lambda e: e.cpp_enum_entry,
        )




def _pascal_case(segment: str) -> str:
    """``some_thing`` → ``SomeThing``; ``foo-bar`` → invalid."""
    if not re.fullmatch(r'[a-z0-9_]+', segment):
        raise CleanError(
            f'Asset-path segment {segment!r} is not lowercase '
            'ascii letters/digits/underscores; rename in the workspace.'
        )
    return ''.join(part.capitalize() for part in segment.split('_'))


# Bucket-id head → fixed AssetKind for buckets that hold a single kind.
_FIXED_BUCKET_KIND: dict[str, AssetKind] = {
    'cube_map_textures': AssetKind.CUBE_MAP_TEXTURE,
    'meshes': AssetKind.MESH,
    'sounds': AssetKind.SOUND,
}

# Bucket-id heads that contribute no entries to the four asset enums.
_SKIP_BUCKET_HEADS: frozenset[str] = frozenset({'language'})


def _kind_for(bucket_id: str, logical_path: str) -> AssetKind | None:
    """Map a (bucket, logical-path) pair to an AssetKind, or None to skip.

    The bucket id's first segment is the primary driver; for ``constant``
    we look at file extension since it mixes sounds + collision-meshes.
    """
    head = bucket_id.split('/', 1)[0]
    ext = Path(logical_path).suffix.lower()
    if head == 'textures':
        if ext not in _TEXTURE_EXTS:
            raise CleanError(
                f'Texture-bucket entry {logical_path!r} has '
                f'unexpected extension {ext!r}.'
            )
        return AssetKind.TEXTURE
    if head == 'constant':
        # Constant bucket can hold sounds + collision-meshes; partition
        # by extension. Anything we don't recognize gets skipped silently
        # — generator stays forward-compatible with future kinds.
        if ext in _SOUND_EXTS:
            return AssetKind.SOUND
        if ext in _MESH_EXTS:
            return AssetKind.MESH
        return None
    if head in _SKIP_BUCKET_HEADS:
        return None
    fixed = _FIXED_BUCKET_KIND.get(head)
    if fixed is not None:
        return fixed
    raise CleanError(f'Unknown asset-bundle bucket type {bucket_id!r}.')


_BUCKET_STAGED_PREFIX = re.compile(r'^ba_data/[^/]+/')


def _strip_logical_prefix(logical_path: str) -> str:
    """``ba_data/textures/foo/bar.dds`` → ``foo/bar``."""
    stripped = _BUCKET_STAGED_PREFIX.sub('', logical_path, count=1)
    return str(Path(stripped).with_suffix(''))



[docs]
def collect(projroot: Path) -> BuildResult:
    """Read cached manifests and produce a validated build result.

    The manifest is produced by ``asset_bundle_build`` (invoked
    via the make rule whose direct dep is
    ``pconfig/projectconfig.json``), so by the time we're
    reading it the file exists and its apverid matches
    projectconfig's ``"assets"``. Anything else is a build-system
    bug we want to surface, not paper over.
    """
    # pylint: disable=import-outside-toplevel, too-many-locals
    from efrotools.project import getprojectconfig

    bundle_path = projroot / '.cache/asset_bundle/gui-minimal/manifest.json'
    if not bundle_path.is_file():
        raise CleanError(
            f'Asset-bundle manifest not found at {bundle_path}; '
            'run `make cmake-build` (or `make assetpins-latest`) '
            'to produce it.'
        )
    bundle = json.loads(bundle_path.read_text())
    packages = _packages_from_manifest(bundle)
    if len(packages) != 1:
        raise CleanError(
            f'Expected exactly one asset-package entry at '
            f'{bundle_path}; got {len(packages)}.'
        )
    apverid, flavor_manifests = packages[0]

    projectconfig_apverid = getprojectconfig(projroot).get('assets')
    if projectconfig_apverid != apverid:
        raise CleanError(
            f"Manifest apverid {apverid!r} does not match "
            f"projectconfig 'assets' {projectconfig_apverid!r}; "
            'the manifest is stale. Try '
            '`make assets-resolve-clean && make cmake-build`.'
        )

    cas_root = projroot / '.cache/assetdata'

    result = BuildResult(apverid=apverid)
    errors: list[str] = []

    for bucket_id, manifest_sha in flavor_manifests.items():
        bucket_path = cas_root / manifest_sha[:2] / manifest_sha[2:]
        if not bucket_path.is_file():
            raise CleanError(
                f"Bucket manifest blob missing: {bucket_path} "
                f'(bucket {bucket_id!r}).'
            )
        bucket = json.loads(bucket_path.read_text())
        for logical_path in sorted(bucket.get('e', {}).keys()):
            kind = _kind_for(bucket_id, logical_path)
            if kind is None:
                continue
            logical_name = _strip_logical_prefix(logical_path)
            segments = logical_name.split('/')
            if len(segments) < 2:
                errors.append(
                    f'Asset {logical_path!r} is at workspace root; '
                    'move into a category subdir (e.g. ui/, test/).'
                )
                continue
            try:
                for seg in segments:
                    _pascal_case(seg)  # validation only
            except CleanError as exc:
                errors.append(str(exc))
                continue
            result.entries.append(
                AssetEntry(
                    kind=kind,
                    logical_name=logical_name,
                    full_logical_path=logical_path,
                )
            )

    # Cross-kind collision check: same logical_name appearing under two
    # AssetKinds is ambiguous since the wrapper namespace is flat.
    by_name: dict[str, list[AssetEntry]] = {}
    for entry in result.entries:
        by_name.setdefault(entry.logical_name, []).append(entry)
    for name, entries in by_name.items():
        kinds = {e.kind for e in entries}
        if len(kinds) > 1:
            errors.append(
                f'Logical name {name!r} appears across multiple asset '
                f'types ({sorted(k.value for k in kinds)}); rename to '
                'disambiguate.'
            )

    if errors:
        raise CleanError(
            'Asset-package validation failed:\n  - ' + '\n  - '.join(errors)
        )

    return result




[docs]
def render_enum_block(result: BuildResult) -> str:
    """Build the autogen-section content for ``base.h``.

    Returns the lines that go between
    ``// __AUTOGENERATED_BUILTIN_ASSET_IDS_BEGIN__`` and
    ``// __AUTOGENERATED_BUILTIN_ASSET_IDS_END__`` in base.h
    (the markers themselves are NOT included).
    """
    lines: list[str] = [
        '//',
        '// Generated by ``tools/pcommand gen_builtin_asset_ids`` (run as part',
        '// of ``make update``) from the construct asset-package pinned in',
        '// ``pconfig/projectconfig.json``. Do not edit by hand; rerun',
        '// ``make update`` to regenerate. New per-asset entries land here as',
        '// the workspace gains them; old hand-coded ``Builtin*OldID`` entries',
        '// above retire one at a time as their callsites migrate.',
        '',
        (
            'inline constexpr const char* kBuiltinAssetsApverid = '
            f'"{result.apverid}";'
        ),
        '',
    ]
    for kind in AssetKind:
        entries = result.entries_for(kind)
        if not entries:
            lines.append(f'enum class {kind.cpp_enum_name} : uint16_t {{}};')
        else:
            lines.append(f'enum class {kind.cpp_enum_name} : uint16_t {{')
            for entry in entries:
                lines.append(
                    f'  {entry.cpp_enum_entry},  '
                    f'// {entry.full_logical_path}'
                )
            lines.append('};')
        lines.append('')
    # Drop the trailing blank line so the closing marker sits flush.
    if lines and lines[-1] == '':
        lines.pop()
    return '\n'.join(lines)




[docs]
def render_load_block(result: BuildResult) -> str:
    """Build the autogen-section content for ``assets.cc``.

    Returns the lines that go between
    ``// __AUTOGENERATED_BUILTIN_ASSET_LOAD_BEGIN__`` and
    ``// __AUTOGENERATED_BUILTIN_ASSET_LOAD_END__`` inside
    ``Assets::StartLoading()`` (the markers themselves are NOT
    included). Lines that would exceed the 80-char cpplint limit
    wrap after the comma.
    """
    lines: list[str] = []
    for kind in AssetKind:
        entries = result.entries_for(kind)
        if not entries:
            continue
        lines.append(f'  // {kind.value}s')
        for entry in entries:
            full = f'{result.apverid}:{entry.logical_name}'
            single = (
                f'  {kind.cpp_loader_name}('
                f'{kind.cpp_enum_name}::{entry.cpp_enum_entry}, '
                f'"{full}");'
            )
            if len(single) <= 80:
                lines.append(single)
            else:
                # Wrap at the comma.
                indent = ' ' * (len(kind.cpp_loader_name) + 3)
                lines.append(
                    f'  {kind.cpp_loader_name}('
                    f'{kind.cpp_enum_name}::{entry.cpp_enum_entry},'
                )
                lines.append(f'{indent}"{full}");')
    return '\n'.join(lines)



_BEGIN_MARKER_PREFIX = '// __AUTOGENERATED_'
_END_MARKER_PREFIX = '// __AUTOGENERATED_'


def _splice_autogen(
    existing: str, begin_marker: str, end_marker: str, new_content: str
) -> str:
    """Replace content between begin/end markers in ``existing``.

    Marker lines (and any indentation in front of them) are
    preserved; only the content strictly between them is replaced.
    Raises ``CleanError`` if either marker isn't found or they're
    in the wrong order.
    """
    lines = existing.split('\n')
    begin_idx: int | None = None
    end_idx: int | None = None
    for i, line in enumerate(lines):
        stripped = line.lstrip()
        if stripped == begin_marker:
            if begin_idx is not None:
                raise CleanError(f'Duplicate begin marker {begin_marker!r}.')
            begin_idx = i
        elif stripped == end_marker:
            if end_idx is not None:
                raise CleanError(f'Duplicate end marker {end_marker!r}.')
            end_idx = i
    if begin_idx is None:
        raise CleanError(f'Begin marker {begin_marker!r} not found.')
    if end_idx is None:
        raise CleanError(f'End marker {end_marker!r} not found.')
    if end_idx <= begin_idx:
        raise CleanError(
            f'End marker {end_marker!r} must come after begin '
            f'marker {begin_marker!r}.'
        )
    new_lines = (
        lines[: begin_idx + 1]
        + ([new_content] if new_content else [])
        + lines[end_idx:]
    )
    return '\n'.join(new_lines)


# Project-relative paths of the files that hold autogen sections.
TARGET_BASE_H = 'src/ballistica/base/base.h'
TARGET_ASSETS_CC = 'src/ballistica/base/assets/assets.cc'

_MARKERS_BASE_H = (
    '// __AUTOGENERATED_BUILTIN_ASSET_IDS_BEGIN__',
    '// __AUTOGENERATED_BUILTIN_ASSET_IDS_END__',
)
_MARKERS_ASSETS_CC = (
    '// __AUTOGENERATED_BUILTIN_ASSET_LOAD_BEGIN__',
    '// __AUTOGENERATED_BUILTIN_ASSET_LOAD_END__',
)



[docs]
def compute_splices(
    projroot: Path,
    base_h_existing: str | None = None,
    assets_cc_existing: str | None = None,
) -> dict[str, str]:
    """Compute spliced contents for both target files.

    Returns a dict keyed by project-relative path with the full new
    file content (existing content + new autogen section). Caller
    decides whether to write — typical use is "write only if
    contents differ from on-disk".

    ``base_h_existing`` / ``assets_cc_existing`` let the caller pass
    in already-read content (e.g. when integrated into a project
    updater that's already loaded the file); pass ``None`` to read
    from disk here.
    """
    # Run the spliced result through clang-format so our output matches
    # what ``make format`` produces. Without this the generator emits
    # raw (e.g. single-line) content that the formatter then rewrites
    # (e.g. wrapping a long apverid assignment), leaving the two in a
    # tug-of-war and making any "is the splice up to date?" check
    # unreliable. The non-autogen parts of these files are already
    # clang-formatted, so this only normalizes the spliced region.
    from efrotools.code import format_cpp_str

    result = collect(projroot)
    if base_h_existing is None:
        base_h_existing = (projroot / TARGET_BASE_H).read_text()
    if assets_cc_existing is None:
        assets_cc_existing = (projroot / TARGET_ASSETS_CC).read_text()
    return {
        TARGET_BASE_H: format_cpp_str(
            projroot,
            _splice_autogen(
                base_h_existing,
                _MARKERS_BASE_H[0],
                _MARKERS_BASE_H[1],
                render_enum_block(result),
            ),
            filename='base.h',
        ),
        TARGET_ASSETS_CC: format_cpp_str(
            projroot,
            _splice_autogen(
                assets_cc_existing,
                _MARKERS_ASSETS_CC[0],
                _MARKERS_ASSETS_CC[1],
                render_load_block(result),
            ),
            filename='assets.cc',
        ),
    }




[docs]
def generate(projroot: Path, check: bool = False) -> bool:
    """Splice generated content into ``base.h`` and ``assets.cc``.

    Reads each target file, replaces the content between its
    ``// __AUTOGENERATED_*__`` marker pair, and writes the file
    only if the resulting content differs from what's on disk.
    Idempotent: a run with no changes leaves both files (and their
    mtimes) untouched.

    Returns True if anything was (or would be) changed.
    """
    spliced = compute_splices(projroot)

    changed = False
    for rel_path, new_text in spliced.items():
        path = projroot / rel_path
        existing = path.read_text()
        if new_text == existing:
            continue
        changed = True
        if check:
            continue
        path.write_text(new_text)
    return changed



# Docs-generation hack; import some stuff that we likely only forward-declared
# in our actual source code so that docs tools can find it.
from typing import (Coroutine, Any, Literal, Callable,
  Generator, Awaitable, Sequence, Self)
import asyncio
from concurrent.futures import Future
from pathlib import Path
from enum import Enum