1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 | def compare_models(
models: List[keras.Model],
X_test: np.ndarray,
y_test: np.ndarray,
model_names: Optional[List[str]] = None
) -> pd.DataFrame:
"""Compare performance of multiple models.
Evaluates multiple models on test data, computes various metrics (accuracy, loss,
precision, recall, F1), and creates comparative visualizations.
Args:
models: List of compiled Keras models to compare.
X_test: Test feature array of shape (n_samples, n_features).
y_test: One-hot encoded target array of shape (n_samples, n_classes).
model_names: Optional list of names for the models. If None, auto-generated names are used.
Returns:
pd.DataFrame: DataFrame with comparison metrics for each model.
Example:
```python
import numpy as np
import keras
X_test = np.random.rand(50, 10)
y_test = np.zeros((50, 3))
y_test[np.arange(50), np.random.randint(0, 3, 50)] = 1
model1 = keras.Sequential([...])
model2 = keras.Sequential([...])
models = [model1, model2]
comparison_df = compare_models(models, X_test, y_test)
```
"""
if model_names is None:
model_names = [f'model_{i}' for i in range(len(models))]
results: List[Dict[str, Any]] = []
for model, name in zip(models, model_names):
# Evaluate model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
# Get predictions
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)
# Calculate additional metrics
from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(true_classes, predicted_classes, average='weighted')
recall = recall_score(true_classes, predicted_classes, average='weighted')
f1 = f1_score(true_classes, predicted_classes, average='weighted')
results.append({
'model': name,
'accuracy': test_accuracy,
'loss': test_loss,
'precision': precision,
'recall': recall,
'f1': f1
})
# Create comparison DataFrame
comparison_df = pd.DataFrame(results)
logger.info(f"Model Comparison:\n{comparison_df}")
# Plot comparison using Plotly subplots
from plotly.subplots import make_subplots
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Accuracy Comparison', 'Loss Comparison', 'Precision Comparison', 'F1 Score Comparison')
)
# Accuracy
fig.add_trace(
go.Bar(x=comparison_df['model'], y=comparison_df['accuracy'], name='Accuracy'),
row=1, col=1
)
# Loss
fig.add_trace(
go.Bar(x=comparison_df['model'], y=comparison_df['loss'], name='Loss'),
row=1, col=2
)
# Precision
fig.add_trace(
go.Bar(x=comparison_df['model'], y=comparison_df['precision'], name='Precision'),
row=2, col=1
)
# F1 Score
fig.add_trace(
go.Bar(x=comparison_df['model'], y=comparison_df['f1'], name='F1 Score'),
row=2, col=2
)
fig.update_layout(height=800, width=1200, showlegend=False, title_text='Model Comparison')
plot(fig, auto_open=False)
return comparison_df
# Usage
# models = [model1, model2, model3]
# model_names = ['Attention Model', 'Residual Model', 'Ensemble Model']
# comparison_df = compare_models(models, X_test, y_test, model_names)
|