# Released under the MIT License. See LICENSE for details.#"""Functionality related to transferring files/data."""from__future__importannotationsimportosfrompathlibimportPathfromdataclassesimportdataclassfromtypingimportTYPE_CHECKING,Annotatedfromefro.dataclassioimportioprepped,IOAttrsifTYPE_CHECKING:pass
[docs]@ioprepped@dataclassclassDirectoryManifestFile:"""Describes a file in a manifest."""hash_sha256:Annotated[str,IOAttrs('h')]size:Annotated[int,IOAttrs('s')]
[docs]@ioprepped@dataclassclassDirectoryManifest:"""Contains a summary of files in a directory."""files:Annotated[dict[str,DirectoryManifestFile],IOAttrs('f')]# Soft-default added April 2024; can remove eventually once this# attr is widespread in client.exists:Annotated[bool,IOAttrs('e',soft_default=True)]
[docs]@classmethoddefcreate_from_disk(cls,path:Path)->DirectoryManifest:"""Create a manifest from a directory on disk."""importhashlibfromconcurrent.futuresimportThreadPoolExecutorpathstr=str(path)paths:list[str]=[]exists=path.exists()ifpath.is_dir():# Build the full list of relative paths.forbasename,_dirnames,filenamesinos.walk(path):forfilenameinfilenames:fullname=os.path.join(basename,filename)assertfullname.startswith(pathstr)# Make sure we end up with forward slashes no matter# what the os.* stuff above here was using.paths.append(Path(fullname[len(pathstr)+1:]).as_posix())elifexists:# Just return a single file entry if path is not a dir.paths.append(path.as_posix())def_get_file_info(filepath:str)->tuple[str,DirectoryManifestFile]:sha=hashlib.sha256()fullfilepath=os.path.join(pathstr,filepath)ifnotos.path.isfile(fullfilepath):raiseRuntimeError(f'File not found: "{fullfilepath}".')withopen(fullfilepath,'rb')asinfile:filebytes=infile.read()filesize=len(filebytes)sha.update(filebytes)return(filepath,DirectoryManifestFile(hash_sha256=sha.hexdigest(),size=filesize),)# Now use all procs to hash the files efficiently.cpus=os.cpu_count()ifcpusisNone:cpus=4withThreadPoolExecutor(max_workers=cpus)asexecutor:returncls(files=dict(executor.map(_get_file_info,paths)),exists=exists)
[docs]defvalidate(self)->None:"""Log any odd data in the manifest; for debugging."""importloggingforfpath,_fentryinself.files.items():# We want to be dealing in only forward slashes; make sure# that's the case (wondering if we'll ever see backslashes# for escape purposes).if'\\'infpath:logging.exception("Found unusual path in manifest: '%s'.",fpath)break# 1 error is enough for now.
# @classmethod# def get_empty_hash(cls) -> str:# """Return the hash for an empty file."""# if cls._empty_hash is None:# import hashlib# sha = hashlib.sha256()# cls._empty_hash = sha.hexdigest()# return cls._empty_hash