Source code for efrotools.lazybuild

# Released under the MIT License. See LICENSE for details.
#
"""Functionality used for building."""

from __future__ import annotations

import os
import time
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING


# Pylint's preferred import order here seems non-deterministic (as of 2.17.2).
# pylint: disable=useless-suppression
# pylint: disable=wrong-import-order
from efro.terminal import Clr
from efrotools.buildlock import BuildLock
from efrotools.util import get_string_hash

# pylint: enable=wrong-import-order
# pylint: enable=useless-suppression

if TYPE_CHECKING:
    from typing import Callable



[docs]
class LazyBuildContext:
    """Run a build if anything in some category is newer than a target.

    This can be used as an optimization for build targets that *always* run.
    As an example, a target that spins up a VM and runs a build can be
    expensive even if the VM build process determines that nothing has changed
    and does no work. We can use this to examine a broad swath of source files
    and skip firing up the VM if nothing has changed. We can be overly broad
    in the sources we look at since the worst result of a false positive change
    is the VM spinning up and determining that no actual inputs have changed.
    We could recreate this mechanism purely in the Makefile, but large numbers
    of target sources can add significant overhead each time the Makefile is
    invoked; in our case the cost is only incurred when a build is triggered.

    Note that target's mod-time will *always* be updated to match the newest
    source regardless of whether the build itself was triggered.
    """

    def __init__(
        self,
        target: str,
        srcpaths: list[str],
        command: str,
        *,
        buildlockname: str | None = None,
        dirfilter: Callable[[str, str], bool] | None = None,
        filefilter: Callable[[str, str], bool] | None = None,
        srcpaths_fullclean: list[str] | None = None,
        srcpaths_exist: list[str] | None = None,
        manifest_file: str | None = None,
        command_fullclean: str | None = None,
    ) -> None:
        self.target = target
        self.srcpaths = srcpaths
        self.srcpaths_exist = srcpaths_exist
        self.command = command
        self.dirfilter = dirfilter
        self.filefilter = filefilter
        self.buildlockname = buildlockname
        self.mtime = (
            None
            if not os.path.exists(self.target)
            else os.path.getmtime(self.target)
        )

        # Show prettier names for lazybuild cache dir targets.
        if target.startswith('.cache/lazybuild/'):
            self.target_name_pretty = target[len('.cache/lazybuild/') :]
        else:
            self.target_name_pretty = target

        self.have_changes = False
        self.have_fullclean_changes = False
        self.total_unchanged_count = 0
        self.printed_trigger = False

        # We support a mechanism where some paths can be passed as 'fullclean'
        # paths - these will trigger a separate 'fullclean' command as well as
        # the regular command when any of them change. This is handy for 'meta'
        # type builds where a lot of tools scripts can conceivably influence
        # target creation, but where it would be unwieldy to list all of them
        # as dependency relationships in a Makefile.
        self.srcpaths_fullclean = srcpaths_fullclean
        self.command_fullclean = command_fullclean

        # We also support a 'manifest' file which is a hash of all filenames
        # processed as part of srcpaths OR srcpaths_fullclean. If defined,
        # any changes in this hash will result in the full-clean-command
        # being run. This is useful for blowing away old output files when
        # source files are removed.
        self.manifest_file = manifest_file


[docs]
    def run(self) -> None:
        """Do the thing."""
        starttime = time.monotonic()

        self._check_for_changes()

        if self.have_changes:
            # If we were given a build-lock-name, surround our payload
            # with a build-lock. This can be used as a sanity-check to make
            # sure that only one build for some given purpose is being run at
            # once.
            if self.buildlockname is not None:
                with BuildLock(self.buildlockname, projroot='.'):
                    self._run_commands_and_update_target()
            else:
                self._run_commands_and_update_target()

        else:
            duration = time.monotonic() - starttime
            print(
                f'{Clr.BLU}Lazybuild: skipping "{self.target_name_pretty}"'
                f' (checked {self.total_unchanged_count} inputs'
                f' in {duration:.2}s).{Clr.RST}'
            )


    def _run_commands_and_update_target(self) -> None:
        assert self.have_changes

        if self.have_fullclean_changes:
            assert self.command_fullclean is not None
            print(
                f'{Clr.MAG}Lazybuild:'
                f' {Clr.SCYN}{Clr.BLD}full-clean{Clr.RST}'
                f'{Clr.MAG} input changed;'
                f' running {Clr.BLD}{self.command_fullclean}.{Clr.RST}',
                flush=True,
            )
            subprocess.run(self.command_fullclean, shell=True, check=True)

        subprocess.run(self.command, shell=True, check=True)

        # Complain if the target path does not exist at this point.
        # (otherwise we'd create an empty file there below which can
        # cause problems).
        # We make a special exception for files under .cache/lazybuild
        # since those are not actually meaningful files; only used for
        # dep tracking.
        if not self.target.startswith(
            '.cache/lazybuild'
        ) and not os.path.isfile(self.target):
            raise RuntimeError(
                f'Expected output file \'{self.target}\' not found'
                f' after running lazybuild command:'
                f' \'{self.command}\'.'
            )

        # We also explicitly update the mod-time of the target;
        # the command we (such as a VM build) may not have actually
        # done anything but we still want to update our target to
        # be newer than all the lazy sources.
        os.makedirs(os.path.dirname(self.target), exist_ok=True)
        Path(self.target).touch()

    def _check_for_changes(self) -> None:
        # pylint: disable=too-many-branches
        manfile = self.manifest_file
        # If we're watching for file adds/removes/renames in addition
        # to just modtimes, build a set of all files we come across.
        man_input_paths = set[str]() if manfile is not None else None

        # First check our fullclean paths if we have them.
        # any changes here will kick off a full-clean and then a build.
        if self.srcpaths_fullclean is not None:
            for srcpath in self.srcpaths_fullclean:
                src_did_change, src_unchanged_count = self._check_path(
                    srcpath, man_input_paths
                )
                if src_did_change:
                    self.have_changes = True
                    self.have_fullclean_changes = True

                    # If we're *not* building a manifest
                    # we can bail on the first difference.
                    if manfile is None:
                        return
                self.total_unchanged_count += src_unchanged_count

        # Now check our regular paths.
        # Any changes here just trigger a regular build.
        for srcpath in self.srcpaths:
            src_did_change, src_unchanged_count = self._check_path(
                srcpath, man_input_paths
            )

            if src_did_change:
                self.have_changes = True
                # If we're *not* building a manifest
                # we can bail on the first difference.
                if manfile is None:
                    return
            self.total_unchanged_count += src_unchanged_count

        # Check our exist-only paths; we simply look to see if these exist
        # and build if not.
        if self.srcpaths_exist is not None:
            for srcpath in self.srcpaths_exist:
                if not os.path.exists(srcpath):
                    self.have_changes = True

        # If we built a manifest, check/write it and kick off
        # a full-clean if anything differed.
        if manfile is not None:
            assert man_input_paths is not None
            # Need to sort to keep this deterministic.
            hashstr = get_string_hash('\n'.join(sorted(man_input_paths)))
            try:
                with open(manfile, encoding='utf-8') as infile:
                    existing_hash = infile.read()
            except FileNotFoundError:
                existing_hash = None
            if hashstr != existing_hash:
                # Manifest changed; write new one and mark us changed.
                os.makedirs(os.path.dirname(manfile), exist_ok=True)
                with open(manfile, 'w', encoding='utf-8') as outfile:
                    outfile.write(hashstr)
                self.have_changes = True
                self.have_fullclean_changes = True

    def _check_path(
        self, srcpath: str, manifest_paths: set[str] | None
    ) -> tuple[bool, int]:
        """Return whether path has changed and unchanged file count if not."""
        unchanged_count = 0

        # Add files verbatim; recurse through dirs.
        if os.path.isfile(srcpath):
            if self._test_path(srcpath):
                return True, 0
            unchanged_count += 1
            return False, unchanged_count

        results: tuple[bool, int] | None = None

        for root, dirnames, fnames in os.walk(srcpath, topdown=True):
            # In top-down mode we can modify dirnames in-place to
            # prevent recursing into them at all.
            for dirname in list(dirnames):  # (make a copy)
                if not self._default_dir_filter(root, dirname) or (
                    self.dirfilter is not None
                    and not self.dirfilter(root, dirname)
                ):
                    dirnames.remove(dirname)

            for fname in fnames:
                if not self._default_file_filter(root, fname) or (
                    self.filefilter is not None
                    and not self.filefilter(root, fname)
                ):
                    continue
                fpath = os.path.join(root, fname)

                # For now don't wanna worry about supporting spaces.
                if ' ' in fpath:
                    raise RuntimeError(f'Invalid path with space: {fpath}')

                if self._test_path(fpath):
                    results = (True, 0)

                    # If we're not building a manifest we can bail
                    # immediately on a negative result.
                    if manifest_paths is None:
                        return results

                # Add files to any manifest set we're building.
                if manifest_paths is not None:
                    manifest_paths.add(fpath)

                unchanged_count += 1

        # If we got here with no negatives, succeed.
        if results is None:
            results = (False, unchanged_count)

        return results

    def _default_dir_filter(self, root: str, dirname: str) -> bool:
        del root  # Unused.

        # Ignore hidden dirs.
        if dirname.startswith('.'):
            return False

        # Ignore Python caches.
        if dirname == '__pycache__':
            return False

        return True

    def _default_file_filter(self, root: str, fname: str) -> bool:
        del root  # Unused.

        # Ignore hidden files.
        if fname.startswith('.'):
            return False

        return True

    def _test_path(self, path: str) -> bool:
        # Now see this path is newer than our target.
        if self.mtime is None or os.path.getmtime(path) > self.mtime:
            # Only announce trigger condition once.
            if not self.printed_trigger:
                self.printed_trigger = True
                print(
                    f'{Clr.MAG}Lazybuild: '
                    f'{Clr.BLD}{self.target_name_pretty}{Clr.RST}{Clr.MAG}'
                    f' build triggered by change in '
                    f'{Clr.BLD}{path}{Clr.RST}{Clr.MAG}.{Clr.RST}',
                    flush=True,
                )
            return True
        return False



# Docs-generation hack; import some stuff that we likely only forward-declared
# in our actual source code so that docs tools can find it.
from typing import (Coroutine, Any, Literal, Callable,
  Generator, Awaitable, Sequence, Self)
import asyncio
from concurrent.futures import Future
from pathlib import Path
from enum import Enum