Skip to content

🤖 Models API Reference

Welcome to the KerasFactory Models documentation! All models are designed to work exclusively with Keras 3 and provide specialized implementations for advanced machine learning tasks including time series forecasting, tabular data processing, and multimodal learning.

What You'll Find Here

Each model includes detailed documentation with: - ✨ Complete parameter descriptions with types and defaults - 🎯 Usage examples showing real-world applications - ⚡ Best practices and performance considerations - 🎨 When to use guidance for each model - 🔧 Implementation notes for developers

Production-Ready

All models are fully tested, documented, and ready for production use.

Keras 3 Compatible

All models are built on top of Keras base classes and are fully compatible with Keras 3.

⏱️ Time Series Forecasting

🎛️ TimeMixer

TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting.

A state-of-the-art time series forecasting model that uses decomposable components and multi-scale mixing to capture both seasonal and trend patterns at different temporal scales.

kerasfactory.models.TimeMixer

TimeMixer model for time series forecasting.

Classes

TimeMixer
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
TimeMixer(
    seq_len: int,
    pred_len: int,
    n_features: int,
    d_model: int = 32,
    d_ff: int = 32,
    e_layers: int = 4,
    dropout: float = 0.1,
    decomp_method: str = "moving_avg",
    moving_avg: int = 25,
    top_k: int = 5,
    channel_independence: int = 0,
    down_sampling_layers: int = 1,
    down_sampling_window: int = 2,
    down_sampling_method: str = "avg",
    use_norm: bool = True,
    decoder_input_size_multiplier: float = 0.5,
    name: str | None = None,
    **kwargs: Any
)

TimeMixer: Decomposable Multi-Scale Mixing for Time Series Forecasting.

A state-of-the-art time series forecasting model that uses series decomposition and multi-scale mixing to capture both trend and seasonal patterns.

Parameters:

Name Type Description Default
seq_len int

Input sequence length.

required
pred_len int

Prediction horizon.

required
n_features int

Number of time series features.

required
d_model int

Model dimension (default: 32).

32
d_ff int

Feed-forward dimension (default: 32).

32
e_layers int

Number of encoder layers (default: 4).

4
dropout float

Dropout rate (default: 0.1).

0.1
decomp_method str

Decomposition method ('moving_avg' or 'dft_decomp').

'moving_avg'
moving_avg int

Moving average window size (default: 25).

25
top_k int

Top-k frequencies for DFT (default: 5).

5
channel_independence int

0 for channel-dependent, 1 for independent (default: 0).

0
down_sampling_layers int

Number of downsampling layers (default: 1).

1
down_sampling_window int

Downsampling window size (default: 2).

2
down_sampling_method str

Downsampling method ('avg', 'max', 'conv').

'avg'
use_norm bool

Whether to use normalization (default: True).

True
decoder_input_size_multiplier float

Decoder input multiplier (default: 0.5).

0.5
name str | None

Optional model name.

None
Example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
import keras
from kerasfactory.models import TimeMixer

# Create model
model = TimeMixer(
    seq_len=96,
    pred_len=12,
    n_features=7,
    d_model=32,
    e_layers=2
)

# Compile and train
model.compile(optimizer='adam', loss='mse')

x = keras.random.normal((32, 96, 7))
y = keras.random.normal((32, 12, 7))
model.fit(x, y, epochs=10)

# Make predictions
predictions = model.predict(x)

Initialize TimeMixer model.

Source code in kerasfactory/models/TimeMixer.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def __init__(
    self,
    seq_len: int,
    pred_len: int,
    n_features: int,
    d_model: int = 32,
    d_ff: int = 32,
    e_layers: int = 4,
    dropout: float = 0.1,
    decomp_method: str = "moving_avg",
    moving_avg: int = 25,
    top_k: int = 5,
    channel_independence: int = 0,
    down_sampling_layers: int = 1,
    down_sampling_window: int = 2,
    down_sampling_method: str = "avg",
    use_norm: bool = True,
    decoder_input_size_multiplier: float = 0.5,
    name: str | None = None,
    **kwargs: Any,
) -> None:
    """Initialize TimeMixer model."""
    # Store parameters
    self._seq_len = seq_len
    self._pred_len = pred_len
    self._n_features = n_features
    self._d_model = d_model
    self._d_ff = d_ff
    self._e_layers = e_layers
    self._dropout = dropout
    self._decomp_method = decomp_method
    self._moving_avg = moving_avg
    self._top_k = top_k
    self._channel_independence = channel_independence
    self._down_sampling_layers = down_sampling_layers
    self._down_sampling_window = down_sampling_window
    self._down_sampling_method = down_sampling_method
    self._use_norm = use_norm
    self._decoder_input_size_multiplier = decoder_input_size_multiplier

    # Validate parameters
    self._validate_params()

    # Create model
    super().__init__(name=name or "TimeMixer", **kwargs)

    # Store as public attributes
    self.seq_len = self._seq_len
    self.pred_len = self._pred_len
    self.n_features = self._n_features
    self.d_model = self._d_model
    self.d_ff = self._d_ff
    self.e_layers = self._e_layers
    self.dropout_rate = self._dropout
    self.decomp_method = self._decomp_method
    self.moving_avg_kernel = self._moving_avg
    self.top_k = self._top_k
    self.channel_independence = self._channel_independence
    self.down_sampling_layers_count = self._down_sampling_layers
    self.down_sampling_window_size = self._down_sampling_window
    self.down_sampling_method = self._down_sampling_method
    self.use_norm = self._use_norm

    # Build label_len
    self.label_len = int(math.ceil(seq_len * decoder_input_size_multiplier))
    if (self.label_len >= seq_len) or (self.label_len <= 0):
        raise ValueError(
            f"Check decoder_input_size_multiplier={decoder_input_size_multiplier}, range (0,1)",
        )

Key Features: - Trend-seasonal decomposition (moving average or DFT) - Multi-scale seasonal and trend mixing - Channel-independent or dependent processing - Support for temporal features (month, day, hour, etc.) - Reversible instance normalization for improved training - Multivariate time series forecasting

Architecture: - Decomposition layer extracts seasonal and trend components - Multi-scale mixing layers hierarchically combine patterns - Encoder blocks with past decomposable mixing - Projection layers for forecast horizon - Reversible normalization for stable training

References: - Wang, S., et al. (2023). "TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting"

🔀 TSMixer

TSMixer: All-MLP Architecture for Multivariate Time Series Forecasting.

An efficient all-MLP model that jointly learns temporal and cross-sectional representations through alternating temporal and feature mixing layers without attention mechanisms.

kerasfactory.models.TSMixer

TSMixer Model - MLP-based multivariate time series forecasting.

Classes

TSMixer
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
TSMixer(
    seq_len: int,
    pred_len: int,
    n_features: int,
    n_blocks: int = 2,
    ff_dim: int = 64,
    dropout: float = 0.1,
    use_norm: bool = True,
    norm_affine: bool = False,
    name: str | None = None,
    **kwargs: Any
)

TSMixer: MLP-based Multivariate Time Series Forecasting.

Time-Series Mixer (TSMixer) is an MLP-based multivariate time-series forecasting model that jointly learns temporal and cross-sectional representations by repeatedly combining time- and feature information using stacked mixing layers.

A mixing layer consists of sequential temporal and feature MLPs that process time series data in a straightforward manner without complex architectures like attention mechanisms.

Parameters:

Name Type Description Default
seq_len int

Sequence length (number of lookback steps).

required
pred_len int

Prediction length (forecast horizon).

required
n_features int

Number of features/time series.

required
n_blocks int

Number of mixing layers in the model.

2
ff_dim int

Hidden dimension for feed-forward networks in feature mixing.

64
dropout float

Dropout rate between 0 and 1.

0.1
use_norm bool

If True, uses Reversible Instance Normalization.

True
norm_affine bool

If True, uses learnable affine transformation in normalization.

False
Input shape

(batch_size, seq_len, n_features)

Output shape

(batch_size, pred_len, n_features)

Example

model = TSMixer( ... seq_len=96, ... pred_len=12, ... n_features=7, ... n_blocks=2, ... ff_dim=64, ... dropout=0.1 ... ) model.compile(optimizer='adam', loss='mse') x = keras.random.normal((32, 96, 7)) y = model(x) y.shape (32, 12, 7)

References

Chen, Si-An, Chun-Liang Li, Nate Yoder, Sercan O. Arik, and Tomas Pfister (2023). "TSMixer: An All-MLP Architecture for Time Series Forecasting." arXiv preprint arXiv:2303.06053.

Initialize the TSMixer model.

Parameters:

Name Type Description Default
seq_len int

Sequence length.

required
pred_len int

Prediction length.

required
n_features int

Number of features.

required
n_blocks int

Number of mixing layers.

2
ff_dim int

Feed-forward hidden dimension.

64
dropout float

Dropout rate.

0.1
use_norm bool

Whether to use instance normalization.

True
norm_affine bool

Whether to use learnable affine transformation in normalization.

False
name str | None

Optional model name.

None
**kwargs Any

Additional keyword arguments.

{}
Source code in kerasfactory/models/TSMixer.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def __init__(
    self,
    seq_len: int,
    pred_len: int,
    n_features: int,
    n_blocks: int = 2,
    ff_dim: int = 64,
    dropout: float = 0.1,
    use_norm: bool = True,
    norm_affine: bool = False,
    name: str | None = None,
    **kwargs: Any,
) -> None:
    """Initialize the TSMixer model.

    Args:
        seq_len: Sequence length.
        pred_len: Prediction length.
        n_features: Number of features.
        n_blocks: Number of mixing layers.
        ff_dim: Feed-forward hidden dimension.
        dropout: Dropout rate.
        use_norm: Whether to use instance normalization.
        norm_affine: Whether to use learnable affine transformation in normalization.
        name: Optional model name.
        **kwargs: Additional keyword arguments.
    """
    # Set private attributes
    self._seq_len = seq_len
    self._pred_len = pred_len
    self._n_features = n_features
    self._n_blocks = n_blocks
    self._ff_dim = ff_dim
    self._dropout = dropout
    self._use_norm = use_norm
    self._norm_affine = norm_affine

    # Validate parameters
    self._validate_params()

    # Set public attributes BEFORE calling parent's __init__
    self.seq_len = self._seq_len
    self.pred_len = self._pred_len
    self.n_features = self._n_features
    self.n_blocks = self._n_blocks
    self.ff_dim = self._ff_dim
    self.dropout_rate = self._dropout
    self.use_norm = self._use_norm
    self.norm_affine = self._norm_affine

    # Model components
    self.norm_layer: ReversibleInstanceNormMultivariate | None = None
    self.mixing_layers: list[MixingLayer] | None = None
    self.output_layer: layers.Dense | None = None

    # Call parent's __init__ after setting public attributes
    super().__init__(name=name, **kwargs)
Functions
summary_info
1
summary_info() -> dict[str, Any]

Get model summary information, automatically building if needed.

This method ensures the model is built before accessing parameter counts.

Returns:

Type Description
dict[str, Any]

A dictionary containing model information:

dict[str, Any]
  • total_params: Total number of parameters
dict[str, Any]
  • trainable_params: Number of trainable parameters
dict[str, Any]
  • non_trainable_params: Number of non-trainable parameters
dict[str, Any]
  • config: Model configuration dictionary
Example

model = TSMixer(seq_len=96, pred_len=12, n_features=5) info = model.summary_info() print(f"Total params: {info['total_params']:,}")

Source code in kerasfactory/models/TSMixer.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def summary_info(self) -> dict[str, Any]:
    """Get model summary information, automatically building if needed.

    This method ensures the model is built before accessing parameter counts.

    Returns:
        A dictionary containing model information:
        - total_params: Total number of parameters
        - trainable_params: Number of trainable parameters
        - non_trainable_params: Number of non-trainable parameters
        - config: Model configuration dictionary

    Example:
        >>> model = TSMixer(seq_len=96, pred_len=12, n_features=5)
        >>> info = model.summary_info()
        >>> print(f"Total params: {info['total_params']:,}")
    """
    # Build the model if not already built
    if not self.built:
        self.build((None, self.seq_len, self.n_features))

    return {
        "total_params": self.count_params(),
        "trainable_params": sum(keras.ops.size(w) for w in self.trainable_weights),
        "non_trainable_params": sum(
            keras.ops.size(w) for w in self.non_trainable_weights
        ),
        "config": self.get_config(),
    }

Key Features: - Temporal and feature mixing for dual-perspective learning - Optional reversible instance normalization for training stability - Configurable stacking of mixing layers (n_blocks parameter) - Linear time complexity O(B × T × D²) vs attention O(B × T²) - Multivariate time series forecasting support - No attention mechanisms - simple, efficient, interpretable

Architecture: - Instance normalization (optional reversible normalization) - Stacked mixing layers (temporal + feature mixing per block) - Output projection layer mapping seq_len → pred_len - Reverse instance denormalization (optional)

When to Use: - Large batch sizes or long sequences where efficiency matters - Interpretability is important (no attention black box) - Limited GPU memory - MLP-based is more memory efficient - Multi-scale temporal and feature interactions needed - Long-term forecasting with multiple related time series

References: - Chen, Si-An, et al. (2023). "TSMixer: An All-MLP Architecture for Time Series Forecasting." arXiv:2303.06053

🏗️ Core Models

🚀 BaseFeedForwardModel

Flexible feed-forward model architecture for tabular data with customizable layers.

kerasfactory.models.feed_forward.BaseFeedForwardModel

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
BaseFeedForwardModel(
    feature_names: list[str],
    hidden_units: list[int],
    output_units: int = 1,
    dropout_rate: float = 0.0,
    activation: str = "relu",
    preprocessing_model: Model | None = None,
    kernel_initializer: str | Any | None = "glorot_uniform",
    bias_initializer: str | Any | None = "zeros",
    kernel_regularizer: str | Any | None = None,
    bias_regularizer: str | Any | None = None,
    activity_regularizer: str | Any | None = None,
    kernel_constraint: str | Any | None = None,
    bias_constraint: str | Any | None = None,
    **kwargs: Any
)

Base feed forward neural network model.

This model implements a basic feed forward neural network with configurable hidden layers, activations, and regularization options.

Example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# Create a simple feed forward model
model = BaseFeedForwardModel(
    feature_names=['feature1', 'feature2'],
    hidden_units=[64, 32],
    output_units=1
)

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(train_dataset, epochs=10)

Initialize Feed Forward Neural Network.

Parameters:

Name Type Description Default
feature_names list[str]

list of feature names.

required
hidden_units list[int]

list of hidden layer units.

required
output_units int

Number of output units.

1
dropout_rate float

Dropout rate.

0.0
activation str

Activation function.

'relu'
preprocessing_model Model | None

Optional preprocessing model.

None
kernel_initializer str | Any | None

Weight initializer.

'glorot_uniform'
bias_initializer str | Any | None

Bias initializer.

'zeros'
kernel_regularizer str | Any | None

Weight regularizer.

None
bias_regularizer str | Any | None

Bias regularizer.

None
activity_regularizer str | Any | None

Activity regularizer.

None
kernel_constraint str | Any | None

Weight constraint.

None
bias_constraint str | Any | None

Bias constraint.

None
**kwargs Any

Additional arguments.

{}
Source code in kerasfactory/models/feed_forward.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def __init__(
    self,
    feature_names: list[str],
    hidden_units: list[int],
    output_units: int = 1,
    dropout_rate: float = 0.0,
    activation: str = "relu",
    preprocessing_model: Model | None = None,
    kernel_initializer: str | Any | None = "glorot_uniform",
    bias_initializer: str | Any | None = "zeros",
    kernel_regularizer: str | Any | None = None,
    bias_regularizer: str | Any | None = None,
    activity_regularizer: str | Any | None = None,
    kernel_constraint: str | Any | None = None,
    bias_constraint: str | Any | None = None,
    **kwargs: Any,
) -> None:
    """Initialize Feed Forward Neural Network.

    Args:
        feature_names: list of feature names.
        hidden_units: list of hidden layer units.
        output_units: Number of output units.
        dropout_rate: Dropout rate.
        activation: Activation function.
        preprocessing_model: Optional preprocessing model.
        kernel_initializer: Weight initializer.
        bias_initializer: Bias initializer.
        kernel_regularizer: Weight regularizer.
        bias_regularizer: Bias regularizer.
        activity_regularizer: Activity regularizer.
        kernel_constraint: Weight constraint.
        bias_constraint: Bias constraint.
        **kwargs: Additional arguments.
    """
    super().__init__(preprocessing_model=preprocessing_model, **kwargs)

    # Store model parameters
    self.feature_names = feature_names
    self.hidden_units = hidden_units
    self.output_units = output_units
    self.dropout_rate = dropout_rate
    self.activation = activation
    self.kernel_initializer = kernel_initializer
    self.bias_initializer = bias_initializer
    self.kernel_regularizer = kernel_regularizer
    self.bias_regularizer = bias_regularizer
    self.activity_regularizer = activity_regularizer
    self.kernel_constraint = kernel_constraint
    self.bias_constraint = bias_constraint

    logger.info("🏗️ Initializing Feed Forward Neural Network")
    logger.info(f"📊 Model Architecture: {hidden_units} -> {output_units}")
    logger.info(f"🔄 Input Features: {feature_names}")

    # Create input layers
    self.input_layers = {}
    for name in feature_names:
        self.input_layers[name] = layers.Input(shape=(1,), name=name)
    logger.debug(f"✨ Created input layers for features: {feature_names}")

    # Build model layers
    self.concat_layer = layers.Concatenate(axis=1)
    logger.debug("✨ Created concatenation layer")

    # Add hidden layers
    self.hidden_layers = []
    for i, units in enumerate(hidden_units, 1):
        logger.debug(f"✨ Adding hidden layer {i} with {units} units")
        dense = layers.Dense(
            units=units,
            activation=activation,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            name=f"hidden_{i}",
        )
        self.hidden_layers.append(dense)

        # Add dropout if specified
        if dropout_rate > 0:
            dropout = layers.Dropout(rate=dropout_rate)
            self.hidden_layers.append(dropout)

    # Add output layer
    logger.debug(f"✨ Adding output layer with {output_units} units")
    self.output_layer = layers.Dense(
        units=output_units,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        name="output",
    )

    # Build the model
    self.build_model()

Functions

from_config classmethod
1
from_config(config: dict[str, Any]) -> BaseFeedForwardModel

Create model from configuration.

Parameters:

Name Type Description Default
config dict[str, Any]

Dict containing model configuration.

required

Returns:

Type Description
BaseFeedForwardModel

Instantiated model.

Source code in kerasfactory/models/feed_forward.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
@classmethod
def from_config(cls, config: dict[str, Any]) -> "BaseFeedForwardModel":
    """Create model from configuration.

    Args:
        config: Dict containing model configuration.

    Returns:
        Instantiated model.
    """
    # Extract preprocessing model if present
    preprocessing_model = config.pop("preprocessing_model", None)

    # Deserialize preprocessing model if it's a config dict
    if preprocessing_model is not None and isinstance(preprocessing_model, dict):
        from keras.saving import deserialize_keras_object

        preprocessing_model = deserialize_keras_object(preprocessing_model)

    # Create model instance
    return cls(preprocessing_model=preprocessing_model, **config)

🎯 Advanced Models

🧩 SFNEBlock

Sparse Feature Network Ensemble block for advanced feature processing and ensemble learning.

kerasfactory.models.SFNEBlock

This module implements a SFNEBlock (Slow-Fast Neural Engine Block) model that combines slow and fast processing paths for feature extraction. It's a building block for the Terminator model.

Classes

SFNEBlock
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
SFNEBlock(
    input_dim: int,
    output_dim: int = None,
    hidden_dim: int = 64,
    num_layers: int = 2,
    slow_network_layers: int = 3,
    slow_network_units: int = 128,
    preprocessing_model: Model | None = None,
    name: str | None = None,
    **kwargs: Any
)

Slow-Fast Neural Engine Block for feature processing.

This model combines a slow network path and a fast processing path to extract features. It uses a SlowNetwork to generate hyper-kernels, which are then used by a HyperZZWOperator to compute context-dependent weights. These weights are further processed by global and local convolutions before being combined.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features.

required
output_dim int

Dimension of the output features. Default is same as input_dim.

None
hidden_dim int

Number of hidden units in the network. Default is 64.

64
num_layers int

Number of layers in the network. Default is 2.

2
slow_network_layers int

Number of layers in the slow network. Default is 3.

3
slow_network_units int

Number of units per layer in the slow network. Default is 128.

128
preprocessing_model Model | None

Optional preprocessing model to apply before the main processing.

None
name str | None

Optional name for the model.

None
Input shape

2D tensor with shape: (batch_size, input_dim) or a dictionary with feature inputs

Output shape

2D tensor with shape: (batch_size, output_dim)

Example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
import keras
from kerasfactory.models import SFNEBlock

# Create sample input data
x = keras.random.normal((32, 16))  # 32 samples, 16 features

# Create the model
sfne = SFNEBlock(input_dim=16, output_dim=8)
y = sfne(x)
print("Output shape:", y.shape)  # (32, 8)

Initialize the SFNEBlock model.

Parameters:

Name Type Description Default
input_dim int

Input dimension.

required
output_dim int

Output dimension.

None
hidden_dim int

Hidden dimension.

64
num_layers int

Number of layers.

2
slow_network_layers int

Number of slow network layers.

3
slow_network_units int

Number of units in slow network.

128
preprocessing_model Model | None

Preprocessing model.

None
name str | None

Name of the model.

None
**kwargs Any

Additional keyword arguments.

{}
Source code in kerasfactory/models/SFNEBlock.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def __init__(
    self,
    input_dim: int,
    output_dim: int = None,
    hidden_dim: int = 64,
    num_layers: int = 2,
    slow_network_layers: int = 3,
    slow_network_units: int = 128,
    preprocessing_model: Model | None = None,
    name: str | None = None,
    **kwargs: Any,
) -> None:
    """Initialize the SFNEBlock model.

    Args:
        input_dim: Input dimension.
        output_dim: Output dimension.
        hidden_dim: Hidden dimension.
        num_layers: Number of layers.
        slow_network_layers: Number of slow network layers.
        slow_network_units: Number of units in slow network.
        preprocessing_model: Preprocessing model.
        name: Name of the model.
        **kwargs: Additional keyword arguments.
    """
    # Extract our specific parameters before calling parent's __init__
    self.input_dim = input_dim
    self.output_dim = output_dim if output_dim is not None else input_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.slow_network_layers = slow_network_layers
    self.slow_network_units = slow_network_units

    # Call parent's __init__ with preprocessing model support
    super().__init__(preprocessing_model=preprocessing_model, name=name, **kwargs)

    # Validate parameters
    self._validate_params()

    # Create layers
    self.input_layer = layers.Dense(self.hidden_dim, activation="relu")
    self.hidden_layers = [
        layers.Dense(self.hidden_dim, activation="relu")
        for _ in range(self.num_layers)
    ]
    self.slow_network = SlowNetwork(
        input_dim=input_dim,
        num_layers=slow_network_layers,
        units=slow_network_units,
    )
    self.hyper_zzw = HyperZZWOperator(input_dim=self.hidden_dim)
    self.global_conv = layers.Conv1D(input_dim, kernel_size=1, activation="relu")
    self.local_conv = layers.Conv1D(
        input_dim,
        kernel_size=3,
        padding="same",
        activation="relu",
    )
    self.bottleneck = layers.Dense(input_dim, activation="relu")
    self.output_layer = layers.Dense(self.output_dim, activation="linear")
Functions
from_config classmethod
1
from_config(config: dict[str, Any]) -> SFNEBlock

Creates a model from its configuration.

Parameters:

Name Type Description Default
config dict[str, Any]

Dictionary containing the model configuration.

required

Returns:

Type Description
SFNEBlock

A new instance of the model.

Source code in kerasfactory/models/SFNEBlock.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
@classmethod
def from_config(cls, config: dict[str, Any]) -> "SFNEBlock":
    """Creates a model from its configuration.

    Args:
        config: Dictionary containing the model configuration.

    Returns:
        A new instance of the model.
    """
    # Extract preprocessing model if present
    preprocessing_model = config.pop("preprocessing_model", None)

    # Create model instance
    return cls(preprocessing_model=preprocessing_model, **config)

🎭 TerminatorModel

Comprehensive tabular model that combines multiple SFNE blocks for complex data tasks.

kerasfactory.models.TerminatorModel

This module implements a TerminatorModel that combines multiple SFNE blocks for advanced feature processing. It's designed for complex tabular data modeling tasks.

Classes

TerminatorModel
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
TerminatorModel(
    input_dim: int,
    context_dim: int,
    output_dim: int,
    hidden_dim: int = 64,
    num_layers: int = 2,
    num_blocks: int = 3,
    slow_network_layers: int = 3,
    slow_network_units: int = 128,
    preprocessing_model: Model | None = None,
    name: str | None = None,
    **kwargs: Any
)

Terminator model for advanced feature processing.

This model stacks multiple SFNE blocks to process features in a hierarchical manner. It's designed for complex tabular data modeling tasks where feature interactions are important.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features.

required
context_dim int

Dimension of the context features.

required
output_dim int

Dimension of the output.

required
hidden_dim int

Number of hidden units in the network. Default is 64.

64
num_layers int

Number of layers in the network. Default is 2.

2
num_blocks int

Number of SFNE blocks to stack. Default is 3.

3
slow_network_layers int

Number of layers in each slow network. Default is 3.

3
slow_network_units int

Number of units per layer in each slow network. Default is 128.

128
preprocessing_model Model | None

Optional preprocessing model to apply before the main processing.

None
name str | None

Optional name for the model.

None
Input shape

List of 2D tensors with shapes: [(batch_size, input_dim), (batch_size, context_dim)]

Output shape

2D tensor with shape: (batch_size, output_dim)

Example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
import keras
from kerasfactory.models import TerminatorModel

# Create sample input data
x = keras.random.normal((32, 16))  # 32 samples, 16 features
context = keras.random.normal((32, 8))  # 32 samples, 8 context features

# Create the model
terminator = TerminatorModel(input_dim=16, context_dim=8, output_dim=1)
y = terminator([x, context])
print("Output shape:", y.shape)  # (32, 1)

Initialize the TerminatorModel.

Parameters:

Name Type Description Default
input_dim int

Input dimension.

required
context_dim int

Context dimension.

required
output_dim int

Output dimension.

required
hidden_dim int

Hidden dimension.

64
num_layers int

Number of layers.

2
num_blocks int

Number of blocks.

3
slow_network_layers int

Number of slow network layers.

3
slow_network_units int

Number of units in slow network.

128
preprocessing_model Model | None

Preprocessing model.

None
name str | None

Name of the model.

None
**kwargs Any

Additional keyword arguments.

{}
Source code in kerasfactory/models/TerminatorModel.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def __init__(
    self,
    input_dim: int,
    context_dim: int,
    output_dim: int,
    hidden_dim: int = 64,
    num_layers: int = 2,
    num_blocks: int = 3,
    slow_network_layers: int = 3,
    slow_network_units: int = 128,
    preprocessing_model: Model | None = None,
    name: str | None = None,
    **kwargs: Any,
) -> None:
    """Initialize the TerminatorModel.

    Args:
        input_dim: Input dimension.
        context_dim: Context dimension.
        output_dim: Output dimension.
        hidden_dim: Hidden dimension.
        num_layers: Number of layers.
        num_blocks: Number of blocks.
        slow_network_layers: Number of slow network layers.
        slow_network_units: Number of units in slow network.
        preprocessing_model: Preprocessing model.
        name: Name of the model.
        **kwargs: Additional keyword arguments.
    """
    # Extract our specific parameters before calling parent's __init__
    self.input_dim = input_dim
    self.context_dim = context_dim
    self.output_dim = output_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.num_blocks = num_blocks
    self.slow_network_layers = slow_network_layers
    self.slow_network_units = slow_network_units

    # Call parent's __init__ with preprocessing model support
    super().__init__(preprocessing_model=preprocessing_model, name=name, **kwargs)

    # Validate parameters
    self._validate_params()

    # Create layers
    self.input_layer = layers.Dense(input_dim, activation="relu")
    self.slow_network = SlowNetwork(
        input_dim=context_dim,
        num_layers=slow_network_layers,
        units=slow_network_units,
    )
    self.hyper_zzw = HyperZZWOperator(input_dim=input_dim, context_dim=context_dim)
    self.sfne_blocks = [
        SFNEBlock(
            input_dim=input_dim,
            output_dim=input_dim,
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            slow_network_layers=slow_network_layers,
            slow_network_units=slow_network_units,
        )
        for _ in range(num_blocks)
    ]
    self.output_layer = layers.Dense(output_dim, activation="sigmoid")

    # Add a context-dependent layer to ensure context affects output
    self.context_dense = layers.Dense(input_dim, activation="relu")
Functions
from_config classmethod
1
from_config(config: dict[str, Any]) -> TerminatorModel

Creates a model from its configuration.

Parameters:

Name Type Description Default
config dict[str, Any]

Dictionary containing the model configuration.

required

Returns:

Type Description
TerminatorModel

A new instance of the model.

Source code in kerasfactory/models/TerminatorModel.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
@classmethod
def from_config(cls, config: dict[str, Any]) -> "TerminatorModel":
    """Creates a model from its configuration.

    Args:
        config: Dictionary containing the model configuration.

    Returns:
        A new instance of the model.
    """
    # Extract preprocessing model if present
    preprocessing_model = config.pop("preprocessing_model", None)

    # Create model instance
    return cls(preprocessing_model=preprocessing_model, **config)

🔍 Autoencoder

Advanced autoencoder model for anomaly detection with optional preprocessing integration and automatic threshold configuration.

kerasfactory.models.autoencoder.Autoencoder

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
Autoencoder(
    input_dim: int,
    encoding_dim: int = 64,
    intermediate_dim: int = 32,
    threshold: float = 2.0,
    preprocessing_model: keras.Model | None = None,
    inputs: dict[str, tuple[int, ...]] | None = None,
    name: str | None = None,
    **kwargs: Any
)

An autoencoder model for anomaly detection with optional preprocessing integration.

This class implements an autoencoder neural network model used for anomaly detection. It can optionally integrate with preprocessing models for production use, making it a single, unified model for both training and inference.

Attributes:

Name Type Description
input_dim int

The dimension of the input data.

encoding_dim int

The dimension of the encoded representation.

intermediate_dim int

The dimension of the intermediate layer.

preprocessing_model Model | None

Optional preprocessing model.

_threshold Variable

The threshold for anomaly detection.

_median Variable

The median of the anomaly scores.

_std Variable

The standard deviation of the anomaly scores.

Initializes the Autoencoder model.

Parameters:

Name Type Description Default
input_dim int

The dimension of the input data.

required
encoding_dim int

The dimension of the encoded representation. Defaults to 64.

64
intermediate_dim int

The dimension of the intermediate layer. Defaults to 32.

32
threshold float

The initial threshold for anomaly detection. Defaults to 2.0.

2.0
preprocessing_model Model

Optional preprocessing model for production use. Defaults to None.

None
inputs dict[str, tuple]

Input shapes for preprocessing model. Defaults to None.

None
name str

The name of the model. Defaults to None.

None
**kwargs Any

Additional keyword arguments passed to the parent class.

{}
Source code in kerasfactory/models/autoencoder.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def __init__(
    self,
    input_dim: int,
    encoding_dim: int = 64,
    intermediate_dim: int = 32,
    threshold: float = 2.0,
    preprocessing_model: keras.Model | None = None,
    inputs: dict[str, tuple[int, ...]] | None = None,
    name: str | None = None,
    **kwargs: Any,
) -> None:
    """Initializes the Autoencoder model.

    Args:
        input_dim (int): The dimension of the input data.
        encoding_dim (int, optional): The dimension of the encoded representation. Defaults to 64.
        intermediate_dim (int, optional): The dimension of the intermediate layer. Defaults to 32.
        threshold (float, optional): The initial threshold for anomaly detection. Defaults to 2.0.
        preprocessing_model (keras.Model, optional): Optional preprocessing model for production use. Defaults to None.
        inputs (dict[str, tuple], optional): Input shapes for preprocessing model. Defaults to None.
        name (str, optional): The name of the model. Defaults to None.
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    # Set private attributes first
    self._input_dim = input_dim
    self._encoding_dim = encoding_dim
    self._intermediate_dim = intermediate_dim
    self._threshold = threshold

    # Validate parameters
    self._validate_params()

    # Set public attributes BEFORE calling parent's __init__
    self.input_dim = self._input_dim
    self.encoding_dim = self._encoding_dim
    self.intermediate_dim = self._intermediate_dim

    # Initialize variables
    self._threshold_var = keras.Variable(
        threshold,
        dtype="float32",
        name="threshold",
    )
    self._median = keras.Variable(
        0.0,
        dtype="float32",
        trainable=False,
        name="median",
    )
    self._std = keras.Variable(0.0, dtype="float32", trainable=False, name="std")

    # Call parent's __init__ with preprocessing model support
    super().__init__(
        preprocessing_model=preprocessing_model,
        inputs=inputs,
        name=name,
        **kwargs,
    )

    # Build the model architecture
    self._build_architecture()

Attributes

threshold property
1
threshold: float

Gets the current threshold value.

Returns:

Name Type Description
float float

The current threshold value.

median property
1
median: float

Gets the current median value.

Returns:

Name Type Description
float float

The current median value.

std property
1
std: float

Gets the current standard deviation value.

Returns:

Name Type Description
float float

The current standard deviation value.

Functions

setup_threshold
1
setup_threshold(data: keras.KerasTensor | Any) -> None

Sets up the threshold for anomaly detection based on the given data.

This method automatically calculates the median and standard deviation of reconstruction errors from the provided data and sets up the threshold for anomaly detection.

Parameters:

Name Type Description Default
data KerasTensor | Any

The data to use for threshold calculation. Can be a tensor or a dataset.

required
Source code in kerasfactory/models/autoencoder.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def setup_threshold(self, data: keras.KerasTensor | Any) -> None:
    """Sets up the threshold for anomaly detection based on the given data.

    This method automatically calculates the median and standard deviation of
    reconstruction errors from the provided data and sets up the threshold
    for anomaly detection.

    Args:
        data (KerasTensor | Any): The data to use for threshold calculation.
            Can be a tensor or a dataset.
    """
    logger.info("Setting up the threshold ...")

    # Built-in metrics
    mean_metric = keras.metrics.Mean()
    # Custom metrics
    median_metric = Median()
    std_metric = StandardDeviation()

    # Handle both tensor and dataset inputs
    if (
        hasattr(data, "__iter__")
        and not isinstance(data, keras.KerasTensor)
        and hasattr(data, "__class__")
        and "Dataset" in str(type(data))
    ):
        # Process dataset batch by batch
        for batch in data:
            if isinstance(batch, tuple):
                # If dataset contains (features, labels), use features only
                x = batch[0]
            else:
                x = batch

            # Calculate reconstruction errors
            reconstructed = self(x, training=False)
            scores = ops.mean(ops.abs(x - reconstructed), axis=1)

            # Update metrics
            mean_metric.update_state(scores)
            std_metric.update_state(scores)
            median_metric.update_state(scores)
    else:
        # Handle tensor input
        reconstructed = self(data, training=False)
        scores = ops.mean(ops.abs(data - reconstructed), axis=1)

        # Update metrics
        mean_metric.update_state(scores)
        std_metric.update_state(scores)
        median_metric.update_state(scores)

    # Update model variables
    self._median.assign(median_metric.result())
    self._std.assign(std_metric.result())

    logger.debug(f"mean: {mean_metric.result().numpy()}")
    logger.debug(f"median: {median_metric.result().numpy()}")
    logger.debug(f"std: {std_metric.result().numpy()}")
    logger.debug(f"assigned _median: {self._median}")
    logger.debug(f"assigned _std: {self._std}")
auto_configure_threshold
1
2
3
4
5
auto_configure_threshold(
    data: keras.KerasTensor | Any,
    percentile: float = 0.95,
    method: str = "iqr",
) -> None

Automatically configure threshold using statistical methods.

This method provides different approaches to automatically set the anomaly detection threshold based on statistical properties of the data.

Parameters:

Name Type Description Default
data KerasTensor | Any

The data to use for threshold calculation.

required
percentile float

Percentile to use for threshold calculation. Defaults to 0.95.

0.95
method str

Method to use for threshold calculation. Options: 'iqr' (Interquartile Range), 'percentile', 'zscore'. Defaults to 'iqr'.

'iqr'
Source code in kerasfactory/models/autoencoder.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def auto_configure_threshold(
    self,
    data: keras.KerasTensor | Any,
    percentile: float = 0.95,
    method: str = "iqr",
) -> None:
    """Automatically configure threshold using statistical methods.

    This method provides different approaches to automatically set the
    anomaly detection threshold based on statistical properties of the data.

    Args:
        data (KerasTensor | Any): The data to use for threshold calculation.
        percentile (float, optional): Percentile to use for threshold calculation. Defaults to 0.95.
        method (str, optional): Method to use for threshold calculation.
            Options: 'iqr' (Interquartile Range), 'percentile', 'zscore'. Defaults to 'iqr'.
    """
    logger.info(f"Auto-configuring threshold using method: {method}")

    # Calculate reconstruction errors
    scores = []

    if (
        hasattr(data, "__iter__")
        and not isinstance(data, keras.KerasTensor)
        and hasattr(data, "__class__")
        and "Dataset" in str(type(data))
    ):
        for batch in data:
            if isinstance(batch, tuple):
                x = batch[0]
            else:
                x = batch
            batch_scores = self.predict_anomaly_scores(x)
            scores.append(batch_scores.numpy())
    else:
        batch_scores = self.predict_anomaly_scores(data)
        scores.append(batch_scores.numpy())

    # Concatenate all scores
    all_scores = ops.concatenate([ops.convert_to_tensor(s) for s in scores])

    if method == "iqr":
        # Interquartile Range method
        q1 = ops.quantile(all_scores, 0.25)
        q3 = ops.quantile(all_scores, 0.75)
        iqr = q3 - q1
        threshold_value = q3 + 1.5 * iqr
    elif method == "percentile":
        # Percentile method
        threshold_value = ops.quantile(all_scores, percentile)
    elif method == "zscore":
        # Z-score method (assuming 3 standard deviations)
        mean_score = ops.mean(all_scores)
        std_score = ops.std(all_scores)
        threshold_value = mean_score + 3 * std_score
    else:
        raise ValueError(
            f"Unknown method: {method}. Use 'iqr', 'percentile', or 'zscore'",
        )

    # Update threshold variable
    self._threshold_var.assign(ops.cast(threshold_value, dtype="float32"))

    # Also update median and std for consistency
    self._median.assign(ops.cast(ops.median(all_scores), dtype="float32"))
    self._std.assign(ops.cast(ops.std(all_scores), dtype="float32"))

    logger.info(f"Auto-configured threshold: {threshold_value.numpy()}")
    logger.debug(f"Updated median: {self._median.numpy()}")
    logger.debug(f"Updated std: {self._std.numpy()}")
fit
1
2
3
4
5
6
7
8
9
fit(
    x: Any = None,
    y: Any = None,
    epochs: int = 1,
    callbacks: list | None = None,
    auto_setup_threshold: bool = True,
    threshold_method: str = "iqr",
    **kwargs: Any
) -> keras.callbacks.History

Fits the model to the given data with optional automatic threshold setup.

Parameters:

Name Type Description Default
x KerasTensor | Any

The training data (features).

None
y Any

The training targets (labels).

None
epochs int

The number of epochs to train for.

1
auto_setup_threshold bool

Whether to automatically setup threshold after training. Defaults to True.

True
threshold_method str

Method for threshold setup. Defaults to "iqr".

'iqr'
callbacks list

A list of callbacks to use during training. Defaults to None.

None
**kwargs Any

Additional keyword arguments passed to the fit method.

{}

Returns:

Type Description
History

keras.callbacks.History: A History object containing training history.

Source code in kerasfactory/models/autoencoder.py
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def fit(
    self,
    x: Any = None,
    y: Any = None,
    epochs: int = 1,
    callbacks: list | None = None,
    auto_setup_threshold: bool = True,
    threshold_method: str = "iqr",
    **kwargs: Any,
) -> keras.callbacks.History:
    """Fits the model to the given data with optional automatic threshold setup.

    Args:
        x (KerasTensor | Any): The training data (features).
        y (Any): The training targets (labels).
        epochs (int): The number of epochs to train for.
        auto_setup_threshold (bool, optional): Whether to automatically setup threshold after training. Defaults to True.
        threshold_method (str, optional): Method for threshold setup. Defaults to "iqr".
        callbacks (list, optional): A list of callbacks to use during training. Defaults to None.
        **kwargs: Additional keyword arguments passed to the fit method.

    Returns:
        keras.callbacks.History: A History object containing training history.
    """
    # Use the base class fit method which handles preprocessing model integration
    history = super().fit(x=x, y=y, epochs=epochs, callbacks=callbacks, **kwargs)

    # Automatically setup threshold if requested (autoencoder-specific functionality)
    if auto_setup_threshold and x is not None:
        logger.info("Auto-setting up threshold after training...")
        if threshold_method in ["iqr", "percentile", "zscore"]:
            self.auto_configure_threshold(x, method=threshold_method)
        else:
            self.setup_threshold(x)

    return history
create_functional_model
1
create_functional_model() -> keras.Model | None

Create a functional model that combines preprocessing and autoencoder.

This method creates a functional Keras model that integrates the preprocessing model (if provided) with the autoencoder for end-to-end inference.

Returns:

Type Description
Model | None

keras.Model: Functional model combining preprocessing and autoencoder, or None if no preprocessing.

Source code in kerasfactory/models/autoencoder.py
444
445
446
447
448
449
450
451
452
453
def create_functional_model(self) -> keras.Model | None:
    """Create a functional model that combines preprocessing and autoencoder.

    This method creates a functional Keras model that integrates the preprocessing
    model (if provided) with the autoencoder for end-to-end inference.

    Returns:
        keras.Model: Functional model combining preprocessing and autoencoder, or None if no preprocessing.
    """
    return self._create_functional_model()
predict_anomaly_scores
1
2
3
predict_anomaly_scores(
    data: keras.KerasTensor,
) -> keras.KerasTensor

Predicts anomaly scores for the given data.

Parameters:

Name Type Description Default
data KerasTensor

The input data to predict on.

required

Returns:

Name Type Description
KerasTensor KerasTensor

An array of anomaly scores.

Source code in kerasfactory/models/autoencoder.py
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def predict_anomaly_scores(self, data: keras.KerasTensor) -> keras.KerasTensor:
    """Predicts anomaly scores for the given data.

    Args:
        data (KerasTensor): The input data to predict on.

    Returns:
        KerasTensor: An array of anomaly scores.
    """
    x_pred = self(data, training=False)
    # Ensure both tensors have the same dtype to avoid type mismatch errors
    data = ops.cast(data, x_pred.dtype)
    scores = ops.mean(ops.abs(data - x_pred), axis=1)
    return scores
predict
1
2
3
4
5
6
predict(
    data: keras.KerasTensor
    | dict[str, keras.KerasTensor]
    | Any,
    **kwargs
) -> keras.KerasTensor | dict[str, keras.KerasTensor]

Predicts reconstruction or anomaly detection results.

This method provides a unified interface for both reconstruction prediction and anomaly detection, depending on whether a preprocessing model is used.

Parameters:

Name Type Description Default
data KerasTensor | dict | Any

The input data to predict on.

required
**kwargs

Additional keyword arguments (ignored for compatibility).

{}

Returns:

Type Description
KerasTensor | dict[str, KerasTensor]

KerasTensor | dict: Reconstruction results or anomaly detection results.

Source code in kerasfactory/models/autoencoder.py
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
def predict(
    self,
    data: keras.KerasTensor | dict[str, keras.KerasTensor] | Any,
    **kwargs,
) -> keras.KerasTensor | dict[str, keras.KerasTensor]:
    """Predicts reconstruction or anomaly detection results.

    This method provides a unified interface for both reconstruction prediction
    and anomaly detection, depending on whether a preprocessing model is used.

    Args:
        data (KerasTensor | dict | Any): The input data to predict on.
        **kwargs: Additional keyword arguments (ignored for compatibility).

    Returns:
        KerasTensor | dict: Reconstruction results or anomaly detection results.
    """
    # Handle dataset inputs
    if (
        hasattr(data, "__iter__")
        and not isinstance(data, keras.KerasTensor)
        and not isinstance(data, dict)
        and hasattr(data, "__class__")
        and "Dataset" in str(type(data))
    ):
        # Process dataset batch by batch
        predictions = []
        for batch in data:
            if isinstance(batch, tuple):
                # If dataset contains (features, labels), use features only
                x = batch[0]
            else:
                x = batch
            batch_pred = self(x, training=False)
            predictions.append(batch_pred)
        # Concatenate all predictions
        return ops.concatenate(predictions)
    else:
        return self(data, training=False)
is_anomaly
1
2
3
4
5
6
is_anomaly(
    data: keras.KerasTensor
    | dict[str, keras.KerasTensor]
    | Any,
    percentile_to_use: str = "median",
) -> dict[str, Any]

Determines if the given data contains anomalies.

This method can handle both individual samples and datasets, providing comprehensive anomaly detection results.

Parameters:

Name Type Description Default
data KerasTensor | dict | Any

The data to check for anomalies.

required
percentile_to_use str

The percentile to use for anomaly detection. Defaults to "median".

'median'

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing anomaly scores, flags, and threshold information.

Source code in kerasfactory/models/autoencoder.py
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
def is_anomaly(
    self,
    data: keras.KerasTensor | dict[str, keras.KerasTensor] | Any,
    percentile_to_use: str = "median",
) -> dict[str, Any]:
    """Determines if the given data contains anomalies.

    This method can handle both individual samples and datasets, providing
    comprehensive anomaly detection results.

    Args:
        data (KerasTensor | dict | Any): The data to check for anomalies.
        percentile_to_use (str, optional): The percentile to use for anomaly detection. Defaults to "median".

    Returns:
        dict[str, Any]: A dictionary containing anomaly scores, flags, and threshold information.
    """
    if (
        hasattr(data, "__iter__")
        and not isinstance(data, keras.KerasTensor)
        and not isinstance(data, dict)
        and hasattr(data, "__class__")
        and "Dataset" in str(type(data))
    ):
        # Handle dataset input
        scores = []
        anomalies = []

        for batch in data:
            if isinstance(batch, tuple):
                x = batch[0]
            else:
                x = batch

            # Calculate scores directly to avoid recursion
            if self.preprocessing_model is not None:
                # Use the call method which handles preprocessing and returns anomaly results
                results = self(x, training=False)
                batch_scores = results["score"]
                batch_anomalies = results["anomaly"]
            else:
                # Standard autoencoder mode
                batch_scores = self.predict_anomaly_scores(x)
                percentile = getattr(self, percentile_to_use)
                batch_anomalies = ops.cast(
                    batch_scores > (percentile + (self.threshold * self.std)),
                    dtype="bool",
                )

            scores.append(batch_scores)
            anomalies.append(batch_anomalies)

        # Concatenate results
        all_scores = ops.concatenate(scores)
        all_anomalies = ops.concatenate(anomalies)

        return {
            "score": all_scores,
            "anomaly": all_anomalies,
            "std": self.std,
            "threshold": self.threshold,
            percentile_to_use: getattr(self, percentile_to_use),
        }

    if self.preprocessing_model is not None:
        # Use the call method which handles preprocessing and returns anomaly results
        results = self(data, training=False)
        return {
            "score": results["score"],
            "anomaly": results["anomaly"],
            "std": results["std"],
            "threshold": results["threshold"],
            percentile_to_use: results["median"],
        }
    else:
        # Standard autoencoder mode
        scores = self.predict_anomaly_scores(data)
        percentile = getattr(self, percentile_to_use)

        anomalies = ops.cast(
            scores > (percentile + (self.threshold * self.std)),
            dtype="bool",
        )

        return {
            "score": scores,
            "anomaly": anomalies,
            "std": self.std,
            "threshold": self.threshold,
            percentile_to_use: percentile,
        }
from_config classmethod
1
from_config(config: dict[str, Any]) -> Autoencoder

Creates a new instance of the model from its config.

Parameters:

Name Type Description Default
config dict

A dictionary containing the configuration of the model.

required

Returns:

Name Type Description
Autoencoder Autoencoder

A new instance of the model.

Source code in kerasfactory/models/autoencoder.py
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
@classmethod
def from_config(cls, config: dict[str, Any]) -> "Autoencoder":
    """Creates a new instance of the model from its config.

    Args:
        config (dict): A dictionary containing the configuration of the model.

    Returns:
        Autoencoder: A new instance of the model.
    """
    preprocessing_model = None
    if config.get("preprocessing_model"):
        preprocessing_model = keras.models.model_from_json(
            config["preprocessing_model"],
        )

    instance = cls(
        input_dim=config["input_dim"],
        encoding_dim=config["encoding_dim"],
        intermediate_dim=config["intermediate_dim"],
        threshold=config["threshold"],
        preprocessing_model=preprocessing_model,
        inputs=config.get("inputs"),
    )
    instance._median.assign(config["median"])
    instance._std.assign(config["std"])
    return instance

🔧 Base Classes

🏛️ BaseModel

Base class for all KerasFactory models, providing common functionality and Keras 3 compatibility.

kerasfactory.models._base.BaseModel

1
BaseModel(*args, **kwargs)

Base model class with comprehensive input handling and common features.

This class extends the standard Keras Model to provide: - Universal input handling (supports any input format) - Preprocessing model integration with automatic fitting - Input validation and standardization - Common utility methods for all models - Automatic functional model creation

Initialize the base model with preprocessing support.

Source code in kerasfactory/models/_base.py
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, *args, **kwargs):
    """Initialize the base model with preprocessing support."""
    # Extract preprocessing-related parameters
    self._preprocessing_model = kwargs.pop("preprocessing_model", None)
    self._inputs = kwargs.pop("inputs", None)
    self._preprocessing_fitted = False

    super().__init__(*args, **kwargs)

    # Set up preprocessing model if provided
    if self._preprocessing_model is not None:
        self._setup_preprocessing_model()

Attributes

preprocessing_model property
1
preprocessing_model: Optional[Model]

Get the preprocessing model.

inputs property
1
inputs: Optional[dict]

Get the input shapes specification.

preprocessing_fitted property
1
preprocessing_fitted: bool

Check if the preprocessing model has been fitted.

Functions

filer_inputs
1
filer_inputs(inputs: dict) -> dict

Filter inputs based on the specified input shapes.

Parameters:

Name Type Description Default
inputs dict

Dictionary of inputs to filter.

required

Returns:

Name Type Description
dict dict

Filtered inputs.

Source code in kerasfactory/models/_base.py
543
544
545
546
547
548
549
550
551
552
553
554
def filer_inputs(self, inputs: dict) -> dict:
    """Filter inputs based on the specified input shapes.

    Args:
        inputs: Dictionary of inputs to filter.

    Returns:
        dict: Filtered inputs.
    """
    if self._inputs is None:
        return inputs
    return {k: v for k, v in inputs.items() if k in self._inputs}
inspect_signatures
1
inspect_signatures(model: Model) -> dict

Inspect the model signatures.

Parameters:

Name Type Description Default
model Model

Model to inspect signatures for.

required

Returns:

Name Type Description
dict dict

Signature information.

Source code in kerasfactory/models/_base.py
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
def inspect_signatures(self, model: Model) -> dict:
    """Inspect the model signatures.

    Args:
        model: Model to inspect signatures for.

    Returns:
        dict: Signature information.
    """
    sig_keys = list(model.signatures.keys())
    logger.info(f"found signatures: {sig_keys}")
    info = {}
    for sig in sig_keys:
        _infer = model.signatures[sig]
        _inputs = _infer.structured_input_signature
        _outputs = _infer.structured_outputs
        info["signature"] = {
            "inputs": _inputs,
            "outputs": _outputs,
        }
    return info
fit
1
2
3
4
5
6
7
fit(
    x: Any = None,
    y: Any = None,
    epochs: int = 1,
    callbacks: list | None = None,
    **kwargs: Any
) -> keras.callbacks.History

Fits the model to the given data with preprocessing model integration.

This method automatically handles preprocessing model fitting if needed, then calls the parent class fit method for training.

Parameters:

Name Type Description Default
x Any

The training data (features).

None
y Any

The training targets (labels).

None
epochs int

The number of epochs to train for.

1
callbacks list

A list of callbacks to use during training. Defaults to None.

None
**kwargs Any

Additional keyword arguments passed to the fit method.

{}

Returns:

Type Description
History

keras.callbacks.History: A History object containing training history.

Source code in kerasfactory/models/_base.py
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
def fit(
    self,
    x: Any = None,
    y: Any = None,
    epochs: int = 1,
    callbacks: list | None = None,
    **kwargs: Any,
) -> keras.callbacks.History:
    """Fits the model to the given data with preprocessing model integration.

    This method automatically handles preprocessing model fitting if needed,
    then calls the parent class fit method for training.

    Args:
        x (Any): The training data (features).
        y (Any): The training targets (labels).
        epochs (int): The number of epochs to train for.
        callbacks (list, optional): A list of callbacks to use during training. Defaults to None.
        **kwargs: Additional keyword arguments passed to the fit method.

    Returns:
        keras.callbacks.History: A History object containing training history.
    """
    # Auto-fit preprocessing model if needed (use x as the data)
    if x is not None:
        self._auto_fit_preprocessing_model(x)

    # Train the model using the parent class fit method
    history = super().fit(x=x, y=y, epochs=epochs, callbacks=callbacks, **kwargs)

    return history
get_input_info
1
get_input_info() -> dict[str, Any]

Get comprehensive input information for the model.

Returns:

Type Description
dict[str, Any]

Dictionary containing input information

Source code in kerasfactory/models/_base.py
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
def get_input_info(self) -> dict[str, Any]:
    """Get comprehensive input information for the model.

    Returns:
        Dictionary containing input information
    """
    info = {
        "has_preprocessing_model": self._preprocessing_model is not None,
        "preprocessing_fitted": self._preprocessing_fitted,
        "input_shapes": self._inputs,
    }

    if self._preprocessing_model is not None:
        if hasattr(self._preprocessing_model, "inputs"):
            info["preprocessing_inputs"] = [
                inp.name for inp in self._preprocessing_model.inputs
            ]
        if hasattr(self._preprocessing_model, "outputs"):
            info["preprocessing_outputs"] = [
                out.name for out in self._preprocessing_model.outputs
            ]

    return info
validate_inputs
1
2
3
validate_inputs(
    inputs: Any, expected_keys: list[str] = None
) -> bool

Validate inputs against expected format.

Parameters:

Name Type Description Default
inputs Any

Input data to validate

required
expected_keys list[str]

Expected feature names

None

Returns:

Type Description
bool

True if inputs are valid, False otherwise

Source code in kerasfactory/models/_base.py
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
def validate_inputs(self, inputs: Any, expected_keys: list[str] = None) -> bool:
    """Validate inputs against expected format.

    Args:
        inputs: Input data to validate
        expected_keys: Expected feature names

    Returns:
        True if inputs are valid, False otherwise
    """
    try:
        standardized_inputs = self._standardize_inputs(inputs)

        if expected_keys is not None:
            for key in expected_keys:
                if key not in standardized_inputs:
                    logger.warning(f"Missing expected input key: {key}")
                    return False

        return True
    except Exception as e:
        logger.error(f"Input validation failed: {e}")
        return False
get_model_summary
1
get_model_summary() -> str

Get a comprehensive model summary.

Returns:

Type Description
str

String containing model summary information

Source code in kerasfactory/models/_base.py
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
def get_model_summary(self) -> str:
    """Get a comprehensive model summary.

    Returns:
        String containing model summary information
    """
    summary_parts = [
        f"Model: {self.name}",
        f"Type: {self.__class__.__name__}",
        f"Built: {self.built}",
    ]

    if self._preprocessing_model is not None:
        summary_parts.append(
            f"Preprocessing: {self._preprocessing_model.__class__.__name__}",
        )
        summary_parts.append(f"Preprocessing Fitted: {self._preprocessing_fitted}")

    if self._inputs is not None:
        summary_parts.append(f"Input Shapes: {self._inputs}")

    if hasattr(self, "feature_names"):
        summary_parts.append(
            f"Feature Names: {getattr(self, 'feature_names', 'N/A')}",
        )

    return " | ".join(summary_parts)
create_functional_model
1
create_functional_model() -> Optional[keras.Model]

Create a functional model that combines preprocessing and main model.

This is a public method that wraps the internal _create_functional_model.

Returns:

Type Description
Optional[Model]

Functional model or None if no preprocessing model

Source code in kerasfactory/models/_base.py
701
702
703
704
705
706
707
708
709
def create_functional_model(self) -> Optional[keras.Model]:
    """Create a functional model that combines preprocessing and main model.

    This is a public method that wraps the internal _create_functional_model.

    Returns:
        Functional model or None if no preprocessing model
    """
    return self._create_functional_model()
reset_preprocessing_fitted
1
reset_preprocessing_fitted() -> None

Reset the preprocessing fitted flag.

Useful when you want to refit the preprocessing model.

Source code in kerasfactory/models/_base.py
711
712
713
714
715
716
717
def reset_preprocessing_fitted(self) -> None:
    """Reset the preprocessing fitted flag.

    Useful when you want to refit the preprocessing model.
    """
    self._preprocessing_fitted = False
    logger.info("Preprocessing fitted flag reset")
set_preprocessing_model
1
set_preprocessing_model(preprocessing_model: Any) -> None

Set a new preprocessing model.

Parameters:

Name Type Description Default
preprocessing_model Any

New preprocessing model to use

required
Source code in kerasfactory/models/_base.py
719
720
721
722
723
724
725
726
727
728
729
def set_preprocessing_model(self, preprocessing_model: Any) -> None:
    """Set a new preprocessing model.

    Args:
        preprocessing_model: New preprocessing model to use
    """
    self._preprocessing_model = preprocessing_model
    self._preprocessing_fitted = False
    if preprocessing_model is not None:
        self._setup_preprocessing_model()
    logger.info(f"Preprocessing model set to: {type(preprocessing_model).__name__}")