Skip to content

Assets API 📦

First-class citizens for data and models with automatic metadata extraction.

Quick Start

Dataset - Auto-Extracted Statistics

from flowyml import Dataset
import pandas as pd

df = pd.DataFrame(...)

# Auto-extracts: samples, features, columns, column_stats
ds = Dataset.create(data=df, name="my_data")

# Access auto-extracted properties
print(ds.num_samples)      # Number of rows
print(ds.num_features)     # Number of columns
print(ds.feature_columns)  # Column names
print(ds.column_stats)     # Per-column statistics

# Convenience methods
ds = Dataset.from_csv("data.csv", name="my_data")
ds = Dataset.from_parquet("data.parquet", name="my_data")

Model - Auto-Extracted Metadata

from flowyml import Model

# Auto-extracts: framework, parameters, layers, optimizer, etc.
model = Model.create(data=keras_model, name="my_model")

# Access auto-extracted properties
print(model.framework)      # 'keras', 'pytorch', 'sklearn'
print(model.parameters)     # Total parameter count
print(model.num_layers)     # Number of layers
print(model.optimizer)      # Optimizer name (Keras)
print(model.hyperparameters)  # Hyperparameters (sklearn)

# Convenience methods
model = Model.from_keras(keras_model, name="my_model", callback=flowyml_callback)
model = Model.from_pytorch(pytorch_model, name="my_model")
model = Model.from_sklearn(sklearn_model, name="my_model")

Supported Frameworks (Model)

Framework Detection Auto-Extraction Level
Keras/TensorFlow Full (layers, optimizer, loss, metrics)
PyTorch Full (layers, device, dtype, params)
Scikit-learn Full (hyperparams, feature importance)
XGBoost Full (trees, hyperparams)
LightGBM Full (trees, hyperparams)
CatBoost Good
Hugging Face Good (config, hidden_size)
Custom Basic (class name, has_fit/predict)

Supported Data Types (Dataset)

Type Auto-Extraction
Pandas DataFrame Full (columns, stats, dtypes)
NumPy array Full (shape, dtype, stats)
Python dict Full (keys as columns, stats)
TensorFlow Dataset Good (element_spec, cardinality)
List of dicts Full (columns from keys, stats)

Class Asset

Base class for all ML assets (datasets, models, features, etc).

Assets are first-class objects in flowyml pipelines with full lineage tracking.

Source code in flowyml/assets/base.py
def __init__(
    self,
    name: str,
    version: str | None = None,
    data: Any = None,
    parent: Optional["Asset"] = None,
    tags: dict[str, str] | None = None,
    properties: dict[str, Any] | None = None,
):
    self.name = name
    self.version = version or "v1.0.0"
    self.data = data
    self.asset_id = str(uuid4())

    # Metadata
    self.metadata = AssetMetadata(
        asset_id=self.asset_id,
        name=name,
        version=self.version,
        asset_type=self.__class__.__name__,
        created_at=datetime.now(),
        created_by="flowyml",
        parent_ids=[parent.asset_id] if parent else [],
        tags=tags or {},
        properties=properties or {},
    )

    # Lineage tracking
    self.parents: list[Asset] = [parent] if parent else []
    self.children: list[Asset] = []

    if parent:
        parent.children.append(self)

Attributes

properties: dict[str, Any] property

Expose mutable properties stored in metadata.

tags: dict[str, str] property

Expose mutable tags stored in metadata.

Functions

add_property(key: str, value: Any) -> None

Add a property to the asset.

Source code in flowyml/assets/base.py
def add_property(self, key: str, value: Any) -> None:
    """Add a property to the asset."""
    self.metadata.properties[key] = value

add_tag(key: str, value: str) -> None

Add a tag to the asset.

Source code in flowyml/assets/base.py
def add_tag(self, key: str, value: str) -> None:
    """Add a tag to the asset."""
    self.metadata.tags[key] = value

create(data: Any, name: str | None = None, version: str | None = None, parent: Optional[Asset] = None, **kwargs: Any) -> Asset classmethod

Factory method to create an asset.

Parameters:

Name Type Description Default
data Any

The actual data/object

required
name str | None

Asset name

None
version str | None

Asset version

None
parent Optional[Asset]

Parent asset for lineage

None
**kwargs Any

Additional metadata

{}

Returns:

Type Description
Asset

New asset instance

Source code in flowyml/assets/base.py
@classmethod
def create(
    cls,
    data: Any,
    name: str | None = None,
    version: str | None = None,
    parent: Optional["Asset"] = None,
    **kwargs: Any,
) -> "Asset":
    """Factory method to create an asset.

    Args:
        data: The actual data/object
        name: Asset name
        version: Asset version
        parent: Parent asset for lineage
        **kwargs: Additional metadata

    Returns:
        New asset instance
    """
    asset_name = name or f"{cls.__name__}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # Extract tags and properties if passed explicitly
    tags = kwargs.pop("tags", {})
    props = kwargs.pop("properties", {})
    # Merge remaining kwargs into properties
    props.update(kwargs)

    return cls(
        name=asset_name,
        version=version,
        data=data,
        parent=parent,
        tags=tags,
        properties=props,
    )

get_all_ancestors() -> set[Asset]

Get all ancestor assets.

Source code in flowyml/assets/base.py
def get_all_ancestors(self) -> set["Asset"]:
    """Get all ancestor assets."""
    ancestors = set()

    def traverse(asset) -> None:
        for parent in asset.parents:
            if parent not in ancestors:
                ancestors.add(parent)
                traverse(parent)

    traverse(self)
    return ancestors

get_all_descendants() -> set[Asset]

Get all descendant assets.

Source code in flowyml/assets/base.py
def get_all_descendants(self) -> set["Asset"]:
    """Get all descendant assets."""
    descendants = set()

    def traverse(asset) -> None:
        for child in asset.children:
            if child not in descendants:
                descendants.add(child)
                traverse(child)

    traverse(self)
    return descendants

get_hash() -> str

Generate hash of asset for caching/versioning.

Source code in flowyml/assets/base.py
def get_hash(self) -> str:
    """Generate hash of asset for caching/versioning."""
    content = json.dumps(
        {
            "name": self.name,
            "version": self.version,
            "type": self.metadata.asset_type,
            "created_at": self.metadata.created_at.isoformat(),
        },
        sort_keys=True,
    )
    return hashlib.sha256(content.encode()).hexdigest()[:16]

get_lineage(depth: int = -1) -> dict[str, Any]

Get asset lineage.

Parameters:

Name Type Description Default
depth int

How many levels to traverse (-1 for all)

-1

Returns:

Type Description
dict[str, Any]

Lineage tree as nested dict

Source code in flowyml/assets/base.py
def get_lineage(self, depth: int = -1) -> dict[str, Any]:
    """Get asset lineage.

    Args:
        depth: How many levels to traverse (-1 for all)

    Returns:
        Lineage tree as nested dict
    """
    lineage = {
        "asset": {
            "asset_id": self.asset_id,
            "name": self.name,
            "type": self.metadata.asset_type,
            "version": self.version,
        },
        "parents": [],
        "children": [],
    }

    if depth != 0:
        next_depth = depth - 1 if depth > 0 else -1
        lineage["parents"] = [p.get_lineage(next_depth) for p in self.parents]
        lineage["children"] = [c.get_lineage(next_depth) for c in self.children]

    return lineage

to_dict() -> dict[str, Any]

Convert asset to dictionary.

Source code in flowyml/assets/base.py
def to_dict(self) -> dict[str, Any]:
    """Convert asset to dictionary."""
    return {
        "metadata": self.metadata.to_dict(),
        "lineage": {
            "parents": [p.asset_id for p in self.parents],
            "children": [c.asset_id for c in self.children],
        },
    }

Class Dataset

Bases: Asset

Dataset asset with automatic schema detection and statistics extraction.

The Dataset class automatically extracts statistics and metadata from various data formats, reducing boilerplate code and improving UX.

Supported formats
  • pandas DataFrame: Auto-extracts columns, dtypes, statistics
  • numpy array: Auto-extracts shape, dtype, statistics
  • dict: Auto-extracts features/target structure, column stats
  • TensorFlow Dataset: Auto-extracts element_spec, cardinality
  • List of dicts: Converts to dict format and extracts stats
Example

Minimal usage - stats are extracted automatically!

import pandas as pd df = pd.read_csv("data.csv") dataset = Dataset.create(data=df, name="my_dataset") print(dataset.num_samples) # Auto-extracted print(dataset.feature_columns) # Auto-detected

With dict format

data = {"features": {"x": [1, 2, 3], "y": [4, 5, 6]}, "target": [0, 1, 0]} dataset = Dataset.create(data=data, name="my_dataset")

All stats computed automatically!

Initialize Dataset with automatic statistics extraction.

Parameters:

Name Type Description Default
name str

Dataset name

required
version str | None

Version string

None
data Any

The actual data (DataFrame, array, dict, etc.)

None
schema Any | None

Optional schema definition

None
location str | None

Storage location/path

None
parent Asset | None

Parent asset for lineage

None
tags dict[str, str] | None

Metadata tags

None
properties dict[str, Any] | None

Additional properties (merged with auto-extracted)

None
auto_extract_stats bool

Whether to automatically extract statistics

True
Source code in flowyml/assets/dataset.py
def __init__(
    self,
    name: str,
    version: str | None = None,
    data: Any = None,
    schema: Any | None = None,
    location: str | None = None,
    parent: Asset | None = None,
    tags: dict[str, str] | None = None,
    properties: dict[str, Any] | None = None,
    auto_extract_stats: bool = True,
):
    """Initialize Dataset with automatic statistics extraction.

    Args:
        name: Dataset name
        version: Version string
        data: The actual data (DataFrame, array, dict, etc.)
        schema: Optional schema definition
        location: Storage location/path
        parent: Parent asset for lineage
        tags: Metadata tags
        properties: Additional properties (merged with auto-extracted)
        auto_extract_stats: Whether to automatically extract statistics
    """
    # Initialize properties dict
    final_properties = properties.copy() if properties else {}

    # Auto-extract statistics if enabled and data is provided
    if auto_extract_stats and data is not None:
        extracted = DatasetStats.extract_stats(data)
        # Merge extracted stats with user-provided properties
        # User properties take precedence
        for key, value in extracted.items():
            if key not in final_properties:
                final_properties[key] = value

    super().__init__(
        name=name,
        version=version,
        data=data,
        parent=parent,
        tags=tags,
        properties=final_properties,
    )

    self.schema = schema
    self.location = location

    # Add dataset-specific properties
    if schema:
        self.metadata.properties["schema"] = str(schema)
    if location:
        self.metadata.properties["location"] = location

Attributes

column_stats: dict[str, dict] | None property

Get per-column statistics (auto-extracted).

columns: list[str] | None property

Get all column names (auto-extracted or user-provided).

feature_columns: list[str] | None property

Get list of feature column names (auto-extracted or user-provided).

framework: str | None property

Get the data framework/format (auto-detected).

label_column: str | None property

Get the label/target column name (auto-detected or user-provided).

num_features: int | None property

Get number of features (auto-extracted or user-provided).

num_samples: int | None property

Get number of samples (auto-extracted or user-provided).

size: int | None property

Get dataset size if available.

Functions

__repr__() -> str

String representation with key stats.

Source code in flowyml/assets/dataset.py
def __repr__(self) -> str:
    """String representation with key stats."""
    parts = [f"Dataset(name='{self.name}'"]
    if self.num_samples:
        parts.append(f"samples={self.num_samples}")
    if self.num_features:
        parts.append(f"features={self.num_features}")
    if self.framework:
        parts.append(f"framework='{self.framework}'")
    return ", ".join(parts) + ")"

create(data: Any, name: str, version: str | None = None, schema: Any | None = None, location: str | None = None, parent: Asset | None = None, tags: dict[str, str] | None = None, properties: dict[str, Any] | None = None, auto_extract_stats: bool = True, **kwargs: Any) -> Dataset classmethod

Create a Dataset with automatic statistics extraction.

This is the preferred way to create Dataset objects. Statistics are automatically extracted from the data, reducing boilerplate code.

Parameters:

Name Type Description Default
data Any

The actual data (DataFrame, array, dict, etc.)

required
name str

Dataset name

required
version str | None

Version string (optional)

None
schema Any | None

Optional schema definition

None
location str | None

Storage location/path

None
parent Asset | None

Parent asset for lineage

None
tags dict[str, str] | None

Metadata tags

None
properties dict[str, Any] | None

Additional properties (merged with auto-extracted)

None
auto_extract_stats bool

Whether to automatically extract statistics

True
**kwargs Any

Additional properties to store

{}

Returns:

Type Description
Dataset

Dataset instance with auto-extracted statistics

Example

df = pd.read_csv("data.csv") dataset = Dataset.create(data=df, name="my_data", source="data.csv")

Stats are automatically extracted!

Source code in flowyml/assets/dataset.py
@classmethod
def create(
    cls,
    data: Any,
    name: str,
    version: str | None = None,
    schema: Any | None = None,
    location: str | None = None,
    parent: Asset | None = None,
    tags: dict[str, str] | None = None,
    properties: dict[str, Any] | None = None,
    auto_extract_stats: bool = True,
    **kwargs: Any,
) -> "Dataset":
    """Create a Dataset with automatic statistics extraction.

    This is the preferred way to create Dataset objects. Statistics are
    automatically extracted from the data, reducing boilerplate code.

    Args:
        data: The actual data (DataFrame, array, dict, etc.)
        name: Dataset name
        version: Version string (optional)
        schema: Optional schema definition
        location: Storage location/path
        parent: Parent asset for lineage
        tags: Metadata tags
        properties: Additional properties (merged with auto-extracted)
        auto_extract_stats: Whether to automatically extract statistics
        **kwargs: Additional properties to store

    Returns:
        Dataset instance with auto-extracted statistics

    Example:
        >>> df = pd.read_csv("data.csv")
        >>> dataset = Dataset.create(data=df, name="my_data", source="data.csv")
        >>> # Stats are automatically extracted!
    """
    # Merge kwargs into properties
    final_props = properties.copy() if properties else {}
    for key, value in kwargs.items():
        if key not in final_props:
            final_props[key] = value

    return cls(
        name=name,
        version=version,
        data=data,
        schema=schema,
        location=location,
        parent=parent,
        tags=tags,
        properties=final_props,
        auto_extract_stats=auto_extract_stats,
    )

from_csv(path: str, name: str | None = None, **kwargs: Any) -> Dataset classmethod

Load a Dataset from a CSV file with automatic statistics.

Parameters:

Name Type Description Default
path str

Path to CSV file

required
name str | None

Dataset name (defaults to filename)

None
**kwargs Any

Additional properties

{}

Returns:

Type Description
Dataset

Dataset with auto-extracted statistics

Source code in flowyml/assets/dataset.py
@classmethod
def from_csv(
    cls,
    path: str,
    name: str | None = None,
    **kwargs: Any,
) -> "Dataset":
    """Load a Dataset from a CSV file with automatic statistics.

    Args:
        path: Path to CSV file
        name: Dataset name (defaults to filename)
        **kwargs: Additional properties

    Returns:
        Dataset with auto-extracted statistics
    """
    try:
        import pandas as pd

        df = pd.read_csv(path)
        dataset_name = name or path.split("/")[-1].replace(".csv", "")

        return cls.create(
            data=df,
            name=dataset_name,
            location=path,
            properties={"source": path, "format": "csv"},
            **kwargs,
        )
    except ImportError:
        raise ImportError("pandas is required for from_csv(). Install with: pip install pandas")

from_parquet(path: str, name: str | None = None, **kwargs: Any) -> Dataset classmethod

Load a Dataset from a Parquet file with automatic statistics.

Parameters:

Name Type Description Default
path str

Path to Parquet file

required
name str | None

Dataset name (defaults to filename)

None
**kwargs Any

Additional properties

{}

Returns:

Type Description
Dataset

Dataset with auto-extracted statistics

Source code in flowyml/assets/dataset.py
@classmethod
def from_parquet(
    cls,
    path: str,
    name: str | None = None,
    **kwargs: Any,
) -> "Dataset":
    """Load a Dataset from a Parquet file with automatic statistics.

    Args:
        path: Path to Parquet file
        name: Dataset name (defaults to filename)
        **kwargs: Additional properties

    Returns:
        Dataset with auto-extracted statistics
    """
    try:
        import pandas as pd

        df = pd.read_parquet(path)
        dataset_name = name or path.split("/")[-1].replace(".parquet", "")

        return cls.create(
            data=df,
            name=dataset_name,
            location=path,
            properties={"source": path, "format": "parquet"},
            **kwargs,
        )
    except ImportError:
        raise ImportError("pandas and pyarrow are required for from_parquet()")

get_column_stat(column: str, stat: str) -> Any

Get a specific statistic for a column.

Parameters:

Name Type Description Default
column str

Column name

required
stat str

Statistic name (mean, std, min, max, median, count, unique)

required

Returns:

Type Description
Any

The statistic value or None

Source code in flowyml/assets/dataset.py
def get_column_stat(self, column: str, stat: str) -> Any:
    """Get a specific statistic for a column.

    Args:
        column: Column name
        stat: Statistic name (mean, std, min, max, median, count, unique)

    Returns:
        The statistic value or None
    """
    stats = self.column_stats
    if stats and column in stats:
        return stats[column].get(stat)
    return None

split(train_ratio: float = 0.8, name_prefix: str | None = None, random_state: int | None = 42) -> tuple[Dataset, Dataset]

Split dataset into train/test with auto-extracted statistics.

Parameters:

Name Type Description Default
train_ratio float

Ratio for training split

0.8
name_prefix str | None

Prefix for split dataset names

None
random_state int | None

Random seed for reproducibility

42

Returns:

Type Description
tuple[Dataset, Dataset]

Tuple of (train_dataset, test_dataset)

Source code in flowyml/assets/dataset.py
def split(
    self,
    train_ratio: float = 0.8,
    name_prefix: str | None = None,
    random_state: int | None = 42,
) -> tuple["Dataset", "Dataset"]:
    """Split dataset into train/test with auto-extracted statistics.

    Args:
        train_ratio: Ratio for training split
        name_prefix: Prefix for split dataset names
        random_state: Random seed for reproducibility

    Returns:
        Tuple of (train_dataset, test_dataset)
    """
    prefix = name_prefix or self.name

    # Try to split based on data type
    data_type = DatasetStats.detect_data_type(self.data)

    if data_type == "pandas":
        try:
            df = self.data.sample(frac=1, random_state=random_state).reset_index(drop=True)
            train_size = int(len(df) * train_ratio)
            train_data = df[:train_size]
            test_data = df[train_size:]
        except Exception:
            train_data = self.data
            test_data = self.data
    elif data_type == "dict" and "features" in self.data:
        # Split dict format
        features = self.data["features"]
        target = self.data.get("target", [])
        first_key = next(iter(features.keys()))
        n_samples = len(features[first_key])
        train_size = int(n_samples * train_ratio)

        train_features = {k: v[:train_size] for k, v in features.items()}
        test_features = {k: v[train_size:] for k, v in features.items()}

        train_data = {"features": train_features, "target": target[:train_size] if target else []}
        test_data = {"features": test_features, "target": target[train_size:] if target else []}
    else:
        # Fallback - no actual splitting
        train_data = self.data
        test_data = self.data

    train_dataset = Dataset(
        name=f"{prefix}_train",
        version=self.version,
        data=train_data,
        schema=self.schema,
        parent=self,
        tags={**self.metadata.tags, "split": "train"},
    )

    test_dataset = Dataset(
        name=f"{prefix}_test",
        version=self.version,
        data=test_data,
        schema=self.schema,
        parent=self,
        tags={**self.metadata.tags, "split": "test"},
    )

    return train_dataset, test_dataset

validate_schema() -> bool

Validate data against schema (placeholder).

Source code in flowyml/assets/dataset.py
def validate_schema(self) -> bool:
    """Validate data against schema (placeholder)."""
    if self.schema is None or self.data is None:
        return True
    # Schema validation would go here
    return True

Class Model

Bases: Asset

Model asset with automatic metadata extraction and training history.

The Model class automatically extracts metadata from various ML frameworks, reducing boilerplate code and improving UX. It also captures training history for visualization in the FlowyML dashboard.

Supported frameworks
  • Keras/TensorFlow: Auto-extracts layers, parameters, optimizer, loss
  • PyTorch: Auto-extracts modules, parameters, training mode
  • Scikit-learn: Auto-extracts hyperparameters, feature importance
  • XGBoost/LightGBM: Auto-extracts trees, hyperparameters
Example

Minimal usage - properties auto-extracted!

model_asset = Model.create( ... data=trained_keras_model, ... name="my_model", ... ) print(model_asset.parameters) # Auto-extracted print(model_asset.framework) # Auto-detected

With FlowyML callback - training history auto-captured

callback = FlowymlKerasCallback(experiment_name="demo") model.fit(X, y, callbacks=[callback]) model_asset = Model.create( ... data=model, ... name="trained_model", ... flowyml_callback=callback, # Auto-extracts training history! ... )

Initialize Model with automatic metadata extraction.

Parameters:

Name Type Description Default
name str

Model name

required
version str | None

Version string

None
data Any

The model object (Keras, PyTorch, sklearn, etc.)

None
architecture str | None

Architecture name (auto-detected if not provided)

None
framework str | None

Framework name (auto-detected if not provided)

None
input_shape tuple | None

Input shape (auto-detected for Keras)

None
output_shape tuple | None

Output shape (auto-detected for Keras)

None
trained_on Asset | None

Dataset this model was trained on

None
parent Asset | None

Parent asset for lineage

None
tags dict[str, str] | None

Metadata tags

None
properties dict[str, Any] | None

Additional properties (merged with auto-extracted)

None
training_history dict[str, list] | None

Training metrics per epoch

None
auto_extract bool

Whether to auto-extract model metadata

True
Source code in flowyml/assets/model.py
def __init__(
    self,
    name: str,
    version: str | None = None,
    data: Any = None,
    architecture: str | None = None,
    framework: str | None = None,
    input_shape: tuple | None = None,
    output_shape: tuple | None = None,
    trained_on: Asset | None = None,
    parent: Asset | None = None,
    tags: dict[str, str] | None = None,
    properties: dict[str, Any] | None = None,
    training_history: dict[str, list] | None = None,
    auto_extract: bool = True,
):
    """Initialize Model with automatic metadata extraction.

    Args:
        name: Model name
        version: Version string
        data: The model object (Keras, PyTorch, sklearn, etc.)
        architecture: Architecture name (auto-detected if not provided)
        framework: Framework name (auto-detected if not provided)
        input_shape: Input shape (auto-detected for Keras)
        output_shape: Output shape (auto-detected for Keras)
        trained_on: Dataset this model was trained on
        parent: Parent asset for lineage
        tags: Metadata tags
        properties: Additional properties (merged with auto-extracted)
        training_history: Training metrics per epoch
        auto_extract: Whether to auto-extract model metadata
    """
    # Initialize properties
    final_properties = properties.copy() if properties else {}

    # Auto-extract model metadata if enabled
    if auto_extract and data is not None:
        extracted = ModelInspector.extract_info(data)
        # Merge - user-provided values take precedence
        for key, value in extracted.items():
            if key not in final_properties:
                final_properties[key] = value

        # Set framework from extracted if not provided
        if framework is None and "framework" in extracted:
            framework = extracted["framework"]

        # Set architecture from extracted if not provided
        if architecture is None and "architecture" in extracted:
            architecture = extracted["architecture"]

    super().__init__(
        name=name,
        version=version,
        data=data,
        parent=parent,
        tags=tags,
        properties=final_properties,
    )

    self.architecture = architecture
    self.framework = framework
    self.input_shape = input_shape
    self.output_shape = output_shape
    self.training_history = training_history

    # Track training dataset
    if trained_on:
        self.parents.append(trained_on)
        trained_on.children.append(self)

    # Add model-specific properties (explicit ones override extracted)
    if architecture:
        self.metadata.properties["architecture"] = architecture
    if framework:
        self.metadata.properties["framework"] = framework
    if input_shape:
        self.metadata.properties["input_shape"] = str(input_shape)
    if output_shape:
        self.metadata.properties["output_shape"] = str(output_shape)

Attributes

hyperparameters: dict | None property

Get hyperparameters (auto-extracted from sklearn/xgboost).

layer_types: list[str] | None property

Get list of layer types (auto-extracted).

learning_rate: float | None property

Get learning rate (auto-extracted from Keras).

loss_function: str | None property

Get loss function (auto-extracted from Keras).

metrics: list[str] | None property

Get metrics (auto-extracted from Keras).

num_layers: int | None property

Get number of layers (auto-extracted).

optimizer: str | None property

Get optimizer name (auto-extracted from Keras).

parameters: int | None property

Get number of model parameters (auto-extracted).

trainable_parameters: int | None property

Get number of trainable parameters (auto-extracted).

Functions

__repr__() -> str

String representation with key info.

Source code in flowyml/assets/model.py
def __repr__(self) -> str:
    """String representation with key info."""
    parts = [f"Model(name='{self.name}'"]
    if self.framework:
        parts.append(f"framework='{self.framework}'")
    if self.parameters:
        parts.append(f"params={self.parameters:,}")
    if self.training_history:
        epochs = len(self.training_history.get("epochs", []))
        parts.append(f"epochs={epochs}")
    return ", ".join(parts) + ")"

create(data: Any, name: str | None = None, version: str | None = None, parent: Asset | None = None, flowyml_callback: Any = None, keras_history: Any = None, auto_extract: bool = True, **kwargs: Any) -> Model classmethod

Create a Model asset with automatic metadata extraction.

This is the preferred way to create Model objects. Metadata is automatically extracted from the model, and training history can be captured from FlowyML callbacks.

Parameters:

Name Type Description Default
data Any

The model object (Keras, PyTorch, sklearn, etc.)

required
name str | None

Asset name (auto-generated if not provided)

None
version str | None

Asset version

None
parent Asset | None

Parent asset for lineage

None
flowyml_callback Any

FlowymlKerasCallback for auto-capturing training history

None
keras_history Any

Keras History object from model.fit()

None
auto_extract bool

Whether to auto-extract model metadata

True
**kwargs Any

Additional parameters including: - training_history: Dict of training metrics per epoch - architecture: Model architecture name - framework: ML framework (keras, pytorch, etc.) - properties: Additional properties - tags: Metadata tags

{}

Returns:

Type Description
Model

New Model instance with auto-extracted metadata

Example

Simple usage - everything auto-extracted

model_asset = Model.create(data=model, name="my_model")

With FlowyML callback

callback = FlowymlKerasCallback(experiment_name="demo") model.fit(X, y, callbacks=[callback]) model_asset = Model.create( ... data=model, ... name="trained_model", ... flowyml_callback=callback, ... )

With Keras History

history = model.fit(X, y) model_asset = Model.create( ... data=model, ... name="trained_model", ... keras_history=history, ... )

Source code in flowyml/assets/model.py
@classmethod
def create(
    cls,
    data: Any,
    name: str | None = None,
    version: str | None = None,
    parent: "Asset | None" = None,
    flowyml_callback: Any = None,
    keras_history: Any = None,
    auto_extract: bool = True,
    **kwargs: Any,
) -> "Model":
    """Create a Model asset with automatic metadata extraction.

    This is the preferred way to create Model objects. Metadata is
    automatically extracted from the model, and training history can
    be captured from FlowyML callbacks.

    Args:
        data: The model object (Keras, PyTorch, sklearn, etc.)
        name: Asset name (auto-generated if not provided)
        version: Asset version
        parent: Parent asset for lineage
        flowyml_callback: FlowymlKerasCallback for auto-capturing training history
        keras_history: Keras History object from model.fit()
        auto_extract: Whether to auto-extract model metadata
        **kwargs: Additional parameters including:
            - training_history: Dict of training metrics per epoch
            - architecture: Model architecture name
            - framework: ML framework (keras, pytorch, etc.)
            - properties: Additional properties
            - tags: Metadata tags

    Returns:
        New Model instance with auto-extracted metadata

    Example:
        >>> # Simple usage - everything auto-extracted
        >>> model_asset = Model.create(data=model, name="my_model")

        >>> # With FlowyML callback
        >>> callback = FlowymlKerasCallback(experiment_name="demo")
        >>> model.fit(X, y, callbacks=[callback])
        >>> model_asset = Model.create(
        ...     data=model,
        ...     name="trained_model",
        ...     flowyml_callback=callback,
        ... )

        >>> # With Keras History
        >>> history = model.fit(X, y)
        >>> model_asset = Model.create(
        ...     data=model,
        ...     name="trained_model",
        ...     keras_history=history,
        ... )
    """
    from datetime import datetime

    asset_name = name or f"Model_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # Extract Model-specific parameters
    training_history = kwargs.pop("training_history", None)
    architecture = kwargs.pop("architecture", None)
    framework = kwargs.pop("framework", None)
    input_shape = kwargs.pop("input_shape", None)
    output_shape = kwargs.pop("output_shape", None)
    trained_on = kwargs.pop("trained_on", None)

    # Auto-extract training history from callback or history object
    if training_history is None:
        if flowyml_callback is not None:
            training_history = ModelInspector.extract_training_history_from_callback(
                flowyml_callback,
            )
        elif keras_history is not None:
            training_history = ModelInspector.extract_training_history_from_callback(
                keras_history,
            )

    # Extract tags and properties
    tags = kwargs.pop("tags", {})
    props = kwargs.pop("properties", {})
    # Merge remaining kwargs into properties
    props.update(kwargs)

    return cls(
        name=asset_name,
        version=version,
        data=data,
        architecture=architecture,
        framework=framework,
        input_shape=input_shape,
        output_shape=output_shape,
        trained_on=trained_on,
        parent=parent,
        tags=tags,
        properties=props,
        training_history=training_history,
        auto_extract=auto_extract,
    )

from_keras(model: Any, name: str | None = None, callback: Any = None, history: Any = None, **kwargs: Any) -> Model classmethod

Create a Model asset from a Keras model with full auto-extraction.

Parameters:

Name Type Description Default
model Any

Keras model object

required
name str | None

Asset name

None
callback Any

FlowymlKerasCallback for training history

None
history Any

Keras History object from model.fit()

None
**kwargs Any

Additional properties

{}

Returns:

Type Description
Model

Model asset with auto-extracted Keras metadata

Source code in flowyml/assets/model.py
@classmethod
def from_keras(
    cls,
    model: Any,
    name: str | None = None,
    callback: Any = None,
    history: Any = None,
    **kwargs: Any,
) -> "Model":
    """Create a Model asset from a Keras model with full auto-extraction.

    Args:
        model: Keras model object
        name: Asset name
        callback: FlowymlKerasCallback for training history
        history: Keras History object from model.fit()
        **kwargs: Additional properties

    Returns:
        Model asset with auto-extracted Keras metadata
    """
    return cls.create(
        data=model,
        name=name,
        framework="keras",
        flowyml_callback=callback,
        keras_history=history,
        **kwargs,
    )

from_pytorch(model: Any, name: str | None = None, training_history: dict | None = None, **kwargs: Any) -> Model classmethod

Create a Model asset from a PyTorch model with full auto-extraction.

Parameters:

Name Type Description Default
model Any

PyTorch model object (nn.Module)

required
name str | None

Asset name

None
training_history dict | None

Training metrics dict

None
**kwargs Any

Additional properties

{}

Returns:

Type Description
Model

Model asset with auto-extracted PyTorch metadata

Source code in flowyml/assets/model.py
@classmethod
def from_pytorch(
    cls,
    model: Any,
    name: str | None = None,
    training_history: dict | None = None,
    **kwargs: Any,
) -> "Model":
    """Create a Model asset from a PyTorch model with full auto-extraction.

    Args:
        model: PyTorch model object (nn.Module)
        name: Asset name
        training_history: Training metrics dict
        **kwargs: Additional properties

    Returns:
        Model asset with auto-extracted PyTorch metadata
    """
    return cls.create(
        data=model,
        name=name,
        framework="pytorch",
        training_history=training_history,
        **kwargs,
    )

from_sklearn(model: Any, name: str | None = None, **kwargs: Any) -> Model classmethod

Create a Model asset from a scikit-learn model with full auto-extraction.

Parameters:

Name Type Description Default
model Any

Scikit-learn model object

required
name str | None

Asset name

None
**kwargs Any

Additional properties

{}

Returns:

Type Description
Model

Model asset with auto-extracted sklearn metadata

Source code in flowyml/assets/model.py
@classmethod
def from_sklearn(
    cls,
    model: Any,
    name: str | None = None,
    **kwargs: Any,
) -> "Model":
    """Create a Model asset from a scikit-learn model with full auto-extraction.

    Args:
        model: Scikit-learn model object
        name: Asset name
        **kwargs: Additional properties

    Returns:
        Model asset with auto-extracted sklearn metadata
    """
    return cls.create(
        data=model,
        name=name,
        framework="sklearn",
        **kwargs,
    )

get_architecture_info() -> dict[str, Any]

Get architecture information.

Source code in flowyml/assets/model.py
def get_architecture_info(self) -> dict[str, Any]:
    """Get architecture information."""
    return {
        "architecture": self.architecture,
        "framework": self.framework,
        "input_shape": self.input_shape,
        "output_shape": self.output_shape,
        "parameters": self.parameters,
        "trainable_parameters": self.trainable_parameters,
        "num_layers": self.num_layers,
        "layer_types": self.layer_types,
    }

get_parameters_count() -> int | None

Get number of model parameters if available.

Source code in flowyml/assets/model.py
def get_parameters_count(self) -> int | None:
    """Get number of model parameters if available."""
    return self.parameters

get_training_datasets()

Get all datasets this model was trained on.

Source code in flowyml/assets/model.py
def get_training_datasets(self):
    """Get all datasets this model was trained on."""
    from flowyml.assets.dataset import Dataset

    return [p for p in self.parents if isinstance(p, Dataset)]

get_training_info() -> dict[str, Any]

Get training information.

Source code in flowyml/assets/model.py
def get_training_info(self) -> dict[str, Any]:
    """Get training information."""
    result = {
        "optimizer": self.optimizer,
        "learning_rate": self.learning_rate,
        "loss_function": self.loss_function,
        "metrics": self.metrics,
    }

    if self.training_history:
        epochs = self.training_history.get("epochs", [])
        result["epochs_trained"] = len(epochs)

        # Get final metrics
        for key, values in self.training_history.items():
            if key != "epochs" and values:
                result[f"final_{key}"] = values[-1]

    return {k: v for k, v in result.items() if v is not None}