Source code for latch.types.glob

from pathlib import Path
from typing import List, Optional

from latch.types.file import LatchFile
from latch.types.utils import is_valid_url


[docs]def file_glob( pattern: str, remote_directory: str, target_dir: Optional[Path] = None ) -> List[LatchFile]: """Constructs a list of LatchFiles from a glob pattern. Convenient utility for passing collections of files between tasks. See [nextflow's channels](https://www.nextflow.io/docs/latest/channel.html) or [snakemake's wildcards](https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#wildcards). for similar functionality in other orchestration tools. The remote location of each constructed LatchFile will be consructed by appending the file name returned by the pattern to the directory represented by the `remote_directory`. Args: pattern: A glob pattern to match a set of files, eg. '\*.py'. Will resolve paths with respect to the working directory of the caller. remote_directory: A valid latch URL pointing to a directory, eg. latch:///foo. This _must_ be a directory and not a file. target_dir: An optional Path object to define an alternate working directory for path resolution Returns: A list of instantiated LatchFile objects. Intended Use: :: @small_task def task(): ... return file_glob("*.fastq.gz", "latch:///fastqc_outputs") """ if not is_valid_url(remote_directory): return [] if target_dir is None: wd = Path.cwd() else: wd = target_dir matched = sorted(wd.glob(pattern)) return [LatchFile(str(file), remote_directory + file.name) for file in matched]