Source code for weaver.processes.builtin.utils

import os
import tempfile
from typing import TYPE_CHECKING
from urllib.parse import urlparse

from weaver import WEAVER_ROOT_DIR
from weaver.formats import ContentType, get_extension

if TYPE_CHECKING:
    from typing import Any, Tuple


[docs] def is_netcdf_url(url): # type: (Any) -> bool """ Validates that the reference is a remote NetCDF file reference. """ try: validate_reference(url, is_file=True) except (TypeError, ValueError): return False return os.path.splitext(url)[-1] == get_extension(ContentType.APP_NETCDF)
[docs] def is_geojson_url(url): # type: (Any) -> bool """ Validates that the reference is a remote GeoJSON file reference. """ try: validate_reference(url, is_file=True) except (TypeError, ValueError): return False return os.path.splitext(url)[-1] in [get_extension(ContentType.APP_GEOJSON), get_extension(ContentType.APP_JSON)]
[docs] def validate_reference(url, is_file): # type: (str, bool) -> None """ Ensures that the provided reference points to a valid remote file or a temporary intermediate file. In order to avoid bypassing security validation of server file access between jobs, remote locations must be enforced. However, :term:`CWL` temporary files must be allowed through for intermediate locations passed around between :term:`Workflow` steps or employed as temporary writing locations for file extraction purposes. """ if not isinstance(url, str): raise TypeError(f"Not a valid URL: [{url!s}]") if (is_file and url.endswith("/")) or (not is_file and not url.endswith("/")): dir_msg = "not supported" if is_file else "required" raise ValueError(f"Not a valid file URL reference [{url}]. Directory path {dir_msg}.") # When in a CWL step, tempdir will return the `/tmp/cwltool_tmp_...' path (since enforced by the tool). # When executed in other situations, it will map to the environment variable or platform-specific tmp path. # Although CWL will set TMPDIR for the current step, the source file could be coming from a previous step. # Therefore, the random part of the path after 'cwltool_tmp_'/'cwltool_out_' could differ from the current ones. tmp_dir = tempfile.gettempdir() tmp_paths = [ f"file://{tmp_dir}/", f"{tmp_dir}/", "file:///tmp/cwltool_out_", "file:///tmp/cwltool_tmp_", "/tmp/cwltool_out_", # nosec: B108 "/tmp/cwltool_tmp_", # nosec: B108 ] if any(url.startswith(path) for path in tmp_paths): return if urlparse(url).scheme not in ["http", "https", "s3"]: raise ValueError(f"Not a valid file URL reference [{url}]. Scheme not supported.")
[docs] def get_package_details(file): # type: (os.PathLike[str]) -> Tuple[str, str, str] """ Obtains the ``builtin`` process details from its file reference. """ name = os.path.split(os.path.splitext(file)[0])[-1] root = WEAVER_ROOT_DIR.rstrip("/") # avoid double // path = str(file).rsplit(f"{root}/", 1)[-1].rsplit(name)[0] mod = f"{path}{name}".replace("/", ".") return name, path, mod