ANOVA Violin Plot

ML Model Accuracy ANOVA Comparison

Statistical analysis of classification accuracy across different machine learning models.

Output
ML Model Accuracy ANOVA Comparison
Python
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

np.random.seed(789)

# Cross-validation accuracy scores (100 runs each)
random_forest = np.random.normal(0.892, 0.025, 100)
xgboost = np.random.normal(0.908, 0.022, 100)
neural_net = np.random.normal(0.915, 0.030, 100)
svm = np.random.normal(0.875, 0.028, 100)

F_stat, p_value = stats.f_oneway(random_forest, xgboost, neural_net, svm)

fig, ax = plt.subplots(figsize=(12, 7), facecolor='#0a0a0f')
ax.set_facecolor('#0a0a0f')

colors = ['#6CF527', '#F5B027', '#F5276C', '#4927F5']

parts = ax.violinplot([random_forest, xgboost, neural_net, svm], 
                       positions=[1, 2, 3, 4], showmeans=True, showmedians=True, widths=0.7)

for i, pc in enumerate(parts['bodies']):
    pc.set_facecolor(colors[i])
    pc.set_alpha(0.7)
    pc.set_edgecolor('white')
    pc.set_linewidth(1.5)

parts['cmeans'].set_color('#27D3F5')
parts['cmeans'].set_linewidth(3)
parts['cmedians'].set_color('white')
parts['cmedians'].set_linewidth(1.5)
for partname in ['cbars', 'cmins', 'cmaxes']:
    parts[partname].set_color('#555555')

# Add quartile boxes inside violins
for i, data in enumerate([random_forest, xgboost, neural_net, svm]):
    q1, median, q3 = np.percentile(data, [25, 50, 75])
    ax.vlines(i+1, q1, q3, color='white', linewidth=4, alpha=0.3)
    ax.scatter([i+1], [median], color='white', s=30, zorder=5)

labels = ['Random\nForest', 'XGBoost', 'Neural\nNetwork', 'SVM']
means = [random_forest.mean(), xgboost.mean(), neural_net.mean(), svm.mean()]

# Mean annotations below plot
for i, (mean, color) in enumerate(zip(means, colors)):
    ax.text(i+1, 0.78, f'{mean:.1%}', ha='center', fontsize=11, color=color, fontweight='bold')

# Stats panel - top left, compact
panel_text = f"F={F_stat:.1f}  p={p_value:.2e}  Best: Neural Net ({neural_net.mean():.1%})"
bbox = dict(boxstyle="round,pad=0.4", facecolor='#0d1117', edgecolor='#F5276C', lw=2)
ax.text(0.5, 1.02, panel_text, transform=ax.transAxes, fontsize=10, color='white',
        ha='center', va='bottom', fontfamily='monospace', bbox=bbox)

ax.set_xticks([1, 2, 3, 4])
ax.set_xticklabels(labels, fontsize=10, color='white')
ax.set_ylabel('Classification Accuracy', fontsize=12, color='white', fontweight='500')
ax.set_title('Machine Learning Model Performance\n10-Fold Cross-Validation Results', 
             fontsize=14, color='white', fontweight='bold', pad=25)
ax.set_ylim(0.77, 1.0)

ax.tick_params(colors='#888888')
for spine in ax.spines.values():
    spine.set_color('#333333')
ax.yaxis.grid(True, color='#1a1a2e', linewidth=0.5)
ax.set_axisbelow(True)

plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Statistical

Did this help you?

Support PyLucid to keep it free & growing

Support