ANOVA Violin Plot
Urban Air Quality ANOVA
Comparing PM2.5 concentration distributions across city zones.
Output
Python
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
np.random.seed(1515)
# PM2.5 concentration (μg/m³)
downtown = np.random.lognormal(3.2, 0.5, 150)
industrial = np.random.lognormal(3.8, 0.6, 150)
residential = np.random.lognormal(2.8, 0.4, 150)
suburban = np.random.lognormal(2.5, 0.35, 150)
# Clip to realistic range
downtown = np.clip(downtown, 5, 100)
industrial = np.clip(industrial, 10, 150)
residential = np.clip(residential, 3, 80)
suburban = np.clip(suburban, 2, 60)
F_stat, p_value = stats.f_oneway(downtown, industrial, residential, suburban)
fig, ax = plt.subplots(figsize=(12, 7), facecolor='#ffffff')
ax.set_facecolor('#ffffff')
colors = ['#F5B027', '#C82909', '#27D3F5', '#6CF527']
parts = ax.violinplot([downtown, industrial, residential, suburban],
positions=[1, 2, 3, 4], showmeans=True, showmedians=True, widths=0.75)
for i, pc in enumerate(parts['bodies']):
pc.set_facecolor(colors[i])
pc.set_alpha(0.6)
pc.set_edgecolor(colors[i])
pc.set_linewidth(2)
parts['cmeans'].set_color('#5314E6')
parts['cmeans'].set_linewidth(2.5)
parts['cmedians'].set_color('#1f2937')
for partname in ['cbars', 'cmins', 'cmaxes']:
parts[partname].set_color('#9ca3af')
# AQI thresholds - compact labels on right
ax.axhline(y=12, color='#22c55e', linestyle='--', alpha=0.7, linewidth=1.5)
ax.axhline(y=35, color='#fbbf24', linestyle='--', alpha=0.7, linewidth=1.5)
ax.axhline(y=55, color='#ef4444', linestyle='--', alpha=0.7, linewidth=1.5)
ax.text(4.45, 12, 'Good', fontsize=7, color='#22c55e', va='center')
ax.text(4.45, 35, 'Mod', fontsize=7, color='#fbbf24', va='center')
ax.text(4.45, 55, 'Unhealthy', fontsize=7, color='#ef4444', va='center')
# Color zones - subtle
ax.axhspan(0, 12, alpha=0.08, color='#22c55e')
ax.axhspan(12, 35, alpha=0.06, color='#fbbf24')
ax.axhspan(35, 55, alpha=0.06, color='#f97316')
ax.axhspan(55, 160, alpha=0.06, color='#ef4444')
labels = ['Downtown', 'Industrial', 'Residential', 'Suburban']
means = [downtown.mean(), industrial.mean(), residential.mean(), suburban.mean()]
# Mean values at bottom
for i, (mean, color) in enumerate(zip(means, colors)):
ax.text(i+1, -12, f'μ={mean:.1f}', ha='center', fontsize=9, color=color, fontweight='bold')
# Stats at top
stats_text = f"ANOVA: F={F_stat:.2f}, p={p_value:.2e} | Best: Suburban (μ={suburban.mean():.1f})"
bbox = dict(boxstyle="round,pad=0.3", facecolor='#f0fdf4', edgecolor='#6CF527', lw=2)
ax.text(0.5, 1.02, stats_text, transform=ax.transAxes, fontsize=10, color='#1f2937',
ha='center', va='bottom', fontfamily='monospace', bbox=bbox)
ax.set_xticks([1, 2, 3, 4])
ax.set_xticklabels(labels, fontsize=11, color='#1f2937')
ax.set_ylabel('PM2.5 (μg/m³)', fontsize=12, color='#1f2937', fontweight='500')
ax.set_title('Air Quality Index by City Zone\nAnnual PM2.5 Measurements',
fontsize=14, color='#1f2937', fontweight='bold', pad=25)
ax.tick_params(colors='#374151')
for spine in ax.spines.values():
spine.set_color('#e5e7eb')
ax.yaxis.grid(True, color='#f3f4f6', linewidth=0.8)
ax.set_axisbelow(True)
ax.set_ylim(-20, 160)
plt.tight_layout()
plt.show()
Library
Matplotlib
Category
Statistical
More ANOVA Violin Plot examples
☕