Source code for dagster._core.definitions.version_strategy

import hashlib
import inspect
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, NamedTuple, Optional

from dagster._annotations import public

if TYPE_CHECKING:
    from .op_definition import OpDefinition
    from .resource_definition import ResourceDefinition
    from .solid_definition import SolidDefinition


[docs]class OpVersionContext(NamedTuple): """Provides execution-time information for computing the version for an op. Attributes: op_def (OpDefinition): The definition of the op to compute a version for. op_config (Any): The parsed config to be passed to the op during execution. """ op_def: "OpDefinition" op_config: Any @property def solid_def(self) -> "SolidDefinition": return self.op_def @property def solid_config(self) -> Any: return self.op_config
SolidVersionContext = OpVersionContext
[docs]class ResourceVersionContext(NamedTuple): """Provides execution-time information for computing the version for a resource. Attributes: resource_def (ResourceDefinition): The definition of the resource whose version will be computed. resource_config (Any): The parsed config to be passed to the resource during execution. """ resource_def: "ResourceDefinition" resource_config: Any
[docs]class VersionStrategy(ABC): """Abstract class for defining a strategy to version ops and resources. When subclassing, `get_op_version` must be implemented, and `get_resource_version` can be optionally implemented. `get_op_version` should ingest an OpVersionContext, and `get_resource_version` should ingest a ResourceVersionContext. From that, each synthesize a unique string called a `version`, which will be tagged to outputs of that solid in the pipeline. Providing a `VersionStrategy` instance to a job will enable memoization on that job, such that only steps whose outputs do not have an up-to-date version will run. """ @public @abstractmethod def get_op_version(self, context: OpVersionContext) -> str: raise NotImplementedError() @public def get_resource_version( self, context: ResourceVersionContext # pylint: disable=unused-argument ) -> Optional[str]: return None
[docs]class SourceHashVersionStrategy(VersionStrategy): """VersionStrategy that checks for changes to the source code of ops and resources. Only checks for changes within the immediate body of the op/resource's decorated function (or compute function, if the op/resource was constructed directly from a definition). """ def _get_source_hash(self, fn): code_as_str = inspect.getsource(fn) return hashlib.sha1(code_as_str.encode("utf-8")).hexdigest() @public def get_op_version(self, context: OpVersionContext) -> str: compute_fn = context.op_def.compute_fn if callable(compute_fn): return self._get_source_hash(compute_fn) else: return self._get_source_hash(compute_fn.decorated_fn) @public def get_resource_version(self, context: ResourceVersionContext) -> Optional[str]: return self._get_source_hash(context.resource_def.resource_fn)