Learn the fundamental workflows for building tabular models with KerasFactory layers. This tutorial covers the most common patterns and best practices.
importpandasaspdimportnumpyasnpfromsklearn.model_selectionimporttrain_test_splitfromkerasfactory.layersimportDifferentiableTabularPreprocessor# Load your datasetdf=pd.read_csv('your_dataset.csv')# Separate features and targetX=df.drop('target',axis=1)y=df['target']# Split the dataX_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)# Convert to numpy arraysX_train=X_train.valuesX_test=X_test.valuesy_train=y_train.valuesy_test=y_test.valuesprint(f"Training shape: {X_train.shape}")print(f"Test shape: {X_test.shape}")
Handling Missing Values
1 2 3 4 5 6 7 8 91011121314
fromkerasfactory.layersimportDifferentiableTabularPreprocessor# Create preprocessing layerpreprocessor=DifferentiableTabularPreprocessor(imputation_strategy='learnable',normalization='learnable')# Fit on training datapreprocessor.adapt(X_train)# Transform dataX_train_processed=preprocessor(X_train)X_test_processed=preprocessor(X_test)
fromkerasfactory.layersimportMultiResolutionTabularAttentiondefmulti_head_model(inputs):"""Model with multi-resolution attention."""# Multi-resolution attentionx=MultiResolutionTabularAttention(num_heads=8,numerical_heads=4,categorical_heads=4)(inputs)# Feature fusionx=GatedFeatureFusion(hidden_dim=128)(x)returnx
3. Ensemble Approach
1 2 3 4 5 6 7 8 9101112131415
fromkerasfactory.layersimportBoostingEnsembleLayerdefensemble_model(inputs):"""Model with boosting ensemble."""# Boosting ensemblex=BoostingEnsembleLayer(num_learners=3,learner_units=64)(inputs)# Final processingx=GatedResidualNetwork(units=64)(x)returnx
4. Anomaly Detection
1 2 3 4 5 6 7 8 910111213
fromkerasfactory.layersimportNumericalAnomalyDetectiondefanomaly_detection_model(inputs):"""Model with anomaly detection."""# Anomaly detectionanomaly_output=NumericalAnomalyDetection()(inputs)# Main processingx=VariableSelection(hidden_dim=64)(inputs)x=TabularAttention(num_heads=8)(x)returnx,anomaly_output
🐛 Troubleshooting
Common Issues
Memory Issues
123456789
# Reduce model sizelayer=TabularAttention(num_heads=4,# Reduce from 8key_dim=32,# Reduce from 64dropout=0.1)# Use smaller batch sizemodel.fit(X_train,y_train,batch_size=16)# Instead of 32
Training Instability
1 2 3 4 5 6 7 8 9101112
# Add gradient clippingmodel.compile(optimizer=keras.optimizers.Adam(clipnorm=1.0),loss='categorical_crossentropy',metrics=['accuracy'])# Use learning rate schedulingdeflr_schedule(epoch):return0.001*(0.1**(epoch//20))callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
Overfitting
1 2 3 4 5 6 7 8 910
# Increase regularizationlayer=VariableSelection(hidden_dim=64,dropout=0.3# Increase dropout)# Add early stoppingcallbacks.append(EarlyStopping(monitor='val_loss',patience=5))
Performance Optimization
Speed Optimization
12345678
# Use fewer attention headslayer=TabularAttention(num_heads=4,key_dim=32)# Reduce hidden dimensionslayer=VariableSelection(hidden_dim=32)# Use mixed precisionkeras.mixed_precision.set_global_policy('mixed_float16')
Memory Optimization
1 2 3 4 5 6 7 8 9101112131415
# Use gradient checkpointingmodel.compile(optimizer=keras.optimizers.Adam(),loss='categorical_crossentropy',run_eagerly=False# Use graph mode)# Process data in smaller chunksdefprocess_in_chunks(data,chunk_size=1000):results=[]foriinrange(0,len(data),chunk_size):chunk=data[i:i+chunk_size]result=model.predict(chunk)results.append(result)returnnp.concatenate(results)