Source code for spine.config.load

"""Main configuration loading functions for SPINE.

This module provides the primary entry points for loading SPINE configurations:
- load_config(): Load from a YAML string
- load_config_file(): Load from a file path
- _load_config_recursive(): Internal recursive loader with include support
"""

import os
import warnings
from typing import Any, Dict, List, Optional, Tuple

import yaml

from .api import META_DESCRIPTION, META_KEY, META_LIST_APPEND, META_STRICT, META_VERSION
from .errors import ConfigCycleError, ConfigIncludeError
from .loader import ConfigLoader, resolve_config_path
from .meta import check_compatibility, extract_metadata
from .operations import (
    _apply_overrides_and_removals,
    apply_collection_operation,
    deep_merge,
    expand_env_vars,
    extract_includes_and_overrides,
    parse_value,
    set_nested_value,
)

__all__ = ["load_config", "load_config_file"]


def _load_config_recursive(
    cfg_path: Optional[str] = None,
    config_string: Optional[str] = None,
    root_dir: Optional[str] = None,
    include_stack: Optional[List[str]] = None,
    compatibility_checks: Optional[List[Tuple[Dict, Dict, str]]] = None,
    download: bool = True,
) -> Tuple[Dict[str, Any], Dict[str, Any], List[str], Dict[str, Any]]:
    """Recursively load config with cycle detection.

    Parameters
    ----------
    cfg_path : Optional[str]
        Path to configuration file (mutually exclusive with config_string)
    config_string : Optional[str]
        YAML configuration string (mutually exclusive with cfg_path)
    root_dir : Optional[str]
        Root directory for resolving relative include paths.
        Required when using config_string with includes.
        Defaults to directory of cfg_path when loading from file.
    include_stack : Optional[List[str]]
        Stack of currently-loading files (for cycle detection)
    compatibility_checks : Optional[List[Tuple[Dict, Dict, str]]]
        List to accumulate (parent_meta, included_meta, path) for deferred checking
    download : bool, default True
        If `True`, resolve `!download` tags by downloading files. If `False`,
        preserve `!download` tags as unresolved values.

    Returns
    -------
    Tuple[Dict[str, Any], Dict[str, Any], List[str], Dict[str, Any]]
        (config content, override directives, removal directives, metadata)

    Raises
    ------
    ConfigCycleError
        If circular include detected
    ConfigIncludeError
        If included file not found
    ValueError
        If both or neither cfg_path and config_string are provided
    """
    # Validate inputs
    if (cfg_path is None) == (config_string is None):
        raise ValueError("Must provide exactly one of cfg_path or config_string")

    # Determine the identifier for cycle detection and root directory
    if cfg_path is not None:
        cfg_path = os.path.abspath(cfg_path)
        identifier = cfg_path
        if root_dir is None:
            root_dir = os.path.dirname(cfg_path)
    else:
        # For string configs, use a pseudo-identifier
        identifier = "<string>"
        if root_dir is None:
            root_dir = os.getcwd()

    # Cycle detection
    if include_stack is None:
        include_stack = []
    if compatibility_checks is None:
        compatibility_checks = []

    if identifier in include_stack and cfg_path is not None:
        cycle = include_stack + [identifier]
        raise ConfigCycleError(cycle)

    include_stack = include_stack + [identifier]

    # Create a custom loader class with the specified root_dir
    class CustomConfigLoader(ConfigLoader):
        def __init__(self, stream) -> None:
            super().__init__(stream, root_dir, download=download)

    # Load YAML
    try:
        if cfg_path is not None:
            with open(cfg_path, "r", encoding="utf-8") as f:
                main_config = yaml.load(f, Loader=CustomConfigLoader)
        else:
            assert config_string is not None  # For the linter's sake
            main_config = yaml.load(config_string, Loader=CustomConfigLoader)
    except FileNotFoundError as exc:
        raise ConfigIncludeError(f"Configuration file not found: {cfg_path}") from exc
    except Exception as exc:
        source = cfg_path if cfg_path else "<string>"
        raise ConfigIncludeError(f"Error loading {source}: {exc}") from exc

    if main_config is None:
        return {}, {}, [], {}

    # Extract metadata
    metadata = extract_metadata(main_config, cfg_path if cfg_path else "<string>")
    strict = metadata[META_STRICT]
    list_append_mode = metadata[META_LIST_APPEND]

    # Extract directives
    includes, overrides, removals, cleaned_config = extract_includes_and_overrides(
        main_config
    )

    # Remove __meta__ from cleaned config
    if META_KEY in cleaned_config:
        del cleaned_config[META_KEY]

    config = {}

    # Process includes
    for include_file in includes:
        # Resolve include path with SPINE_CONFIG_PATH support
        include_path = resolve_config_path(include_file, root_dir)

        # Recursively load
        (
            included_config,
            included_overrides,
            included_removals,
            included_meta,
        ) = _load_config_recursive(
            cfg_path=include_path,
            root_dir=None,
            include_stack=include_stack,
            compatibility_checks=compatibility_checks,
            download=download,
        )

        # Warn if included file has no metadata (but keep the metadata that was extracted)
        if not included_meta.get(META_VERSION) and not included_meta.get(
            META_DESCRIPTION
        ):
            # File likely has no __meta__ block (only defaults)
            file_name = os.path.basename(include_path)
            warnings.warn(
                f"Included file '{file_name}' has no __meta__ block. "
                f"Consider adding metadata for better configuration management.",
                stacklevel=2,
            )

        # Defer compatibility check until all includes loaded
        compatibility_checks.append((metadata, included_meta, include_path))

        # Merge included config
        config = deep_merge(config, included_config)

        # Merge component versions from included metadata into parent
        # This allows subsequent includes to check against accumulated components
        if "components" in included_meta:
            if "components" not in metadata:
                metadata["components"] = {}
            metadata["components"].update(included_meta["components"])
        elif META_VERSION in included_meta:
            # If included file has version but no components, infer component name from file path
            # e.g., base/base_240719.yaml -> component "base" with version "240719"
            # This allows configs without explicit components to still participate in version checking
            include_dir = os.path.basename(os.path.dirname(include_path))
            if include_dir and include_dir not in ("", "."):
                if "components" not in metadata:
                    metadata["components"] = {}
                metadata["components"][include_dir] = included_meta[META_VERSION]

        # Apply included overrides (use included file's strict/list_append settings)
        included_strict = included_meta.get(META_STRICT, strict)
        included_list_append = included_meta.get(META_LIST_APPEND, list_append_mode)

        config, unapplied = _apply_overrides_and_removals(
            config,
            included_overrides,
            included_removals,
            included_strict,
            included_list_append,
        )

        # Propagate unapplied overrides
        if unapplied:
            overrides = {**unapplied, **overrides}

    # Merge main config content
    if cleaned_config:
        config = deep_merge(config, cleaned_config)

    return config, overrides, removals, metadata


[docs] def load_config( config_str: str, root_dir: Optional[str] = None, download: bool = True ) -> Dict[str, Any]: """Load a SPINE configuration from a YAML string. Similar to yaml.safe_load(), but with SPINE's advanced features: - Hierarchical includes with cycle detection - Metadata via __meta__ blocks - Override semantics with dot-notation - Collection operations (list append/remove, dict key removal) - Configurable strict modes (warn/error) See module docstring for full configuration language spec. Parameters ---------- config_str : str YAML configuration string root_dir : Optional[str] Root directory for resolving relative include paths. Also used as the base for SPINE_CONFIG_PATH searches. If not provided, defaults to current working directory. Required if config contains __include__ directives with relative paths. download : bool, default True If `True`, resolve `!download` tags by downloading files. If `False`, preserve `!download` tags as unresolved values. Returns ------- Dict[str, Any] Loaded and merged configuration Raises ------ ConfigCycleError If circular include detected ConfigIncludeError If included file not found or can't be loaded ConfigPathError If removal/operation targets non-existent path (when strict="error") ConfigTypeError If operation applied to wrong type ConfigOperationError If invalid operation specified Examples -------- Simple string config: >>> config_str = \"\"\" ... io: ... reader: ... batch_size: 32 ... \"\"\" >>> config = load_config(config_str) >>> print(config['io']['reader']['batch_size']) 32 String config with includes (requires root_dir): >>> config_str = \"\"\" ... include: base.yaml ... model: ... name: resnet ... \"\"\" >>> config = load_config(config_str, root_dir="/path/to/configs") For loading from files, use load_config_file(): >>> config = load_config_file("config.yaml") Or equivalently: >>> with open("config.yaml") as f: ... config = load_config(f.read()) See Also -------- load_config_file : Load configuration from a file path """ # Load recursively, accumulating compatibility checks compatibility_checks = [] config, overrides, removals, metadata = _load_config_recursive( config_string=config_str, root_dir=root_dir, compatibility_checks=compatibility_checks, download=download, ) # Now that all includes are loaded, check all compatibility requirements for parent_meta, included_meta, include_path in compatibility_checks: check_compatibility(parent_meta, included_meta, include_path) # Get strict and list_append settings from top-level metadata # (these are always present, set by extract_metadata with defaults) strict = metadata[META_STRICT] list_append_mode = metadata[META_LIST_APPEND] # Apply top-level overrides # Note: these include both explicit top-level overrides and propagated ones from nested files # Use strict mode from top-level metadata for key_path, value in overrides.items(): parsed_value = parse_value(value) if key_path.endswith("+") or key_path.endswith("-"): # Collection operations - use strict mode from metadata base_key = key_path[:-1] operation = key_path[-1] config = apply_collection_operation( config, base_key, parsed_value, operation, strict, list_append_mode ) else: # Regular override - silently skip if parent doesn't exist config, _ = set_nested_value( config, key_path, parsed_value, only_if_exists=True ) # Apply top-level removals for key_path in removals: config, _ = set_nested_value(config, key_path, None, delete=True, strict=strict) # Remove __meta__ from final config if META_KEY in config: del config[META_KEY] return expand_env_vars(config)
[docs] def load_config_file(cfg_path: str, download: bool = True) -> Dict[str, Any]: """Load a SPINE configuration from a file. Convenience function that reads a configuration file and passes it to load_config. The file's directory is automatically used as root_dir for include resolution. Parameters ---------- cfg_path : str Path to configuration file download : bool, default True If `True`, resolve `!download` tags by downloading files. If `False`, preserve `!download` tags as unresolved values. Returns ------- Dict[str, Any] Loaded and merged configuration Raises ------ ConfigCycleError If circular include detected ConfigIncludeError If included file not found or can't be loaded ConfigPathError If removal/operation targets non-existent path (when strict="error") ConfigTypeError If operation applied to wrong type ConfigOperationError If invalid operation specified Examples -------- >>> config = load_config_file("config.yaml") >>> print(config['io']['reader']['batch_size']) 32 See Also -------- load_config : Load config from a YAML string """ # Load recursively, accumulating compatibility checks compatibility_checks = [] cfg_path = os.path.abspath(cfg_path) root_dir = os.path.dirname(cfg_path) config, overrides, removals, metadata = _load_config_recursive( cfg_path=cfg_path, root_dir=root_dir, compatibility_checks=compatibility_checks, download=download, ) # Now that all includes are loaded, check all compatibility requirements for parent_meta, included_meta, include_path in compatibility_checks: check_compatibility(parent_meta, included_meta, include_path) # Get strict and list_append settings from top-level metadata # (these are always present, set by extract_metadata with defaults) strict = metadata[META_STRICT] list_append_mode = metadata[META_LIST_APPEND] # Apply top-level overrides # Note: these include both explicit top-level overrides and propagated ones from nested files # Use strict mode from top-level metadata for key_path, value in overrides.items(): parsed_value = parse_value(value) if key_path.endswith("+") or key_path.endswith("-"): # Collection operations - use strict mode from metadata base_key = key_path[:-1] operation = key_path[-1] config = apply_collection_operation( config, base_key, parsed_value, operation, strict, list_append_mode ) else: # Regular override - silently skip if parent doesn't exist config, _ = set_nested_value( config, key_path, parsed_value, only_if_exists=True ) # Apply top-level removals for key_path in removals: config, _ = set_nested_value(config, key_path, None, delete=True, strict=strict) # Remove __meta__ from final config if META_KEY in config: del config[META_KEY] return expand_env_vars(config)