Source code for cfinterface.files.registerfile

import warnings
from typing import IO, TYPE_CHECKING, Any

from cfinterface.components.defaultregister import DefaultRegister
from cfinterface.components.register import Register
from cfinterface.data.registerdata import RegisterData
from cfinterface.reading.registerreading import RegisterReading
from cfinterface.storage import StorageType, _ensure_storage_type
from cfinterface.versioning import resolve_version
from cfinterface.writing.registerwriting import RegisterWriting

if TYPE_CHECKING:
    from cfinterface.versioning import VersionMatchResult


[docs] class RegisterFile: """ Class that models a file divided by registers, where the reading and writing are given by a series of registers. """ __slots__ = ["__data", "__storage", "__encoding"] VERSIONS: dict[str, list[type[Register]]] = {} REGISTERS: list[type[Register]] = [] ENCODING: str | list[str] = ["utf-8", "latin-1", "ascii"] STORAGE: str | StorageType = StorageType.TEXT __VERSION = "latest" def __init__( self, data: RegisterData = RegisterData(DefaultRegister(data="")), # noqa: B008 ) -> None: self.__data: RegisterData = data self.__storage: str | StorageType = _ensure_storage_type( self.__class__.STORAGE ) self.__encoding: str = ( self.__class__.ENCODING if type(self.__class__.ENCODING) is str else self.__class__.ENCODING[0] ) def __eq__(self, o: object) -> bool: if not isinstance(o, RegisterFile): return False return self.data == o.data def _as_df(self, register_type: type[Register]) -> "pd.DataFrame": # type: ignore[name-defined] # noqa: F821 """Return registers of the given type as a read-only DataFrame.""" try: import pandas as pd except ImportError: raise ImportError( "pandas is required for _as_df(). " "Install it with: pip install cfinterface[pandas]" ) from None registers = list(self.data.of_type(register_type)) if len(registers) == 0: return pd.DataFrame() cols = registers[0].custom_properties return pd.DataFrame( data={c: [getattr(r, c) for r in registers] for c in cols} )
[docs] @classmethod def read( cls, content: str | bytes, *args: Any, version: str | None = None, **kwargs: Any, ) -> "RegisterFile": """Read from a file path or buffer. ``version`` selects a component set from VERSIONS without mutating the class.""" components = cls.REGISTERS if version is not None and cls.VERSIONS: resolved = resolve_version(version, cls.VERSIONS) if resolved is not None: components = resolved else: warnings.warn( f"No matching version for '{version}' in " f"{cls.__name__}.VERSIONS. Using default components.", stacklevel=2, ) reader = RegisterReading(components, cls.STORAGE, *args, **kwargs) if type(cls.ENCODING) is str: return cls(reader.read(content, cls.ENCODING, *args, **kwargs)) else: for encoding in cls.ENCODING: try: return cls(reader.read(content, encoding, *args, **kwargs)) except UnicodeDecodeError: pass raise EncodingWarning( "Failed to decode content with all specified encodings." )
def write(self, to: str | IO[Any], *args: Any, **kwargs: Any) -> None: writer = RegisterWriting(self.__data, self.__storage) writer.write(to, self.__encoding, *args, **kwargs)
[docs] @classmethod def read_many( cls, paths: list[str], *, version: str | None = None, ) -> dict[str, "RegisterFile"]: """Read multiple files and return a dict keyed by file path. Parameters ---------- paths : list[str] File paths to read. version : str or None, optional Version key passed to :meth:`read`. Defaults to None. Returns ------- dict[str, RegisterFile] Mapping from each file path to its parsed RegisterFile instance. """ return {path: cls.read(path, version=version) for path in paths}
[docs] def validate( self, version: str | None = None, threshold: float = 0.5, ) -> "VersionMatchResult": """Validate parsed data against expected component types.""" from cfinterface.versioning import resolve_version, validate_version expected = self.__class__.REGISTERS if version is not None and self.__class__.VERSIONS: resolved = resolve_version(version, self.__class__.VERSIONS) if resolved is None: result = validate_version( self.data, expected, DefaultRegister, threshold ) return result._replace(matched=False) expected = resolved return validate_version(self.data, expected, DefaultRegister, threshold)
@property def data(self) -> RegisterData: return self.__data
[docs] @classmethod def set_version(cls, v: str) -> None: """ Set the active register set for the given version key. Resolves to the latest available version <= v, so an out-of-range key falls back to the nearest known version. .. deprecated:: Use ``read(content, version="...")`` instead. """ warnings.warn( "set_version() is deprecated. " 'Use read(content, version="...") instead.', DeprecationWarning, stacklevel=2, ) resolved = resolve_version(v, cls.VERSIONS) if resolved is not None: cls.__VERSION = v cls.REGISTERS = resolved