ANOVA Violin Plot

Urban Air Quality ANOVA

Comparing PM2.5 concentration distributions across city zones.

Output
Urban Air Quality ANOVA
Python
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

np.random.seed(1515)

# PM2.5 concentration (μg/m³)
downtown = np.random.lognormal(3.2, 0.5, 150)
industrial = np.random.lognormal(3.8, 0.6, 150)
residential = np.random.lognormal(2.8, 0.4, 150)
suburban = np.random.lognormal(2.5, 0.35, 150)

# Clip to realistic range
downtown = np.clip(downtown, 5, 100)
industrial = np.clip(industrial, 10, 150)
residential = np.clip(residential, 3, 80)
suburban = np.clip(suburban, 2, 60)

F_stat, p_value = stats.f_oneway(downtown, industrial, residential, suburban)

fig, ax = plt.subplots(figsize=(12, 7), facecolor='#ffffff')
ax.set_facecolor('#ffffff')

colors = ['#F5B027', '#C82909', '#27D3F5', '#6CF527']

parts = ax.violinplot([downtown, industrial, residential, suburban], 
                       positions=[1, 2, 3, 4], showmeans=True, showmedians=True, widths=0.75)

for i, pc in enumerate(parts['bodies']):
    pc.set_facecolor(colors[i])
    pc.set_alpha(0.6)
    pc.set_edgecolor(colors[i])
    pc.set_linewidth(2)

parts['cmeans'].set_color('#5314E6')
parts['cmeans'].set_linewidth(2.5)
parts['cmedians'].set_color('#1f2937')
for partname in ['cbars', 'cmins', 'cmaxes']:
    parts[partname].set_color('#9ca3af')

# AQI thresholds - compact labels on right
ax.axhline(y=12, color='#22c55e', linestyle='--', alpha=0.7, linewidth=1.5)
ax.axhline(y=35, color='#fbbf24', linestyle='--', alpha=0.7, linewidth=1.5)
ax.axhline(y=55, color='#ef4444', linestyle='--', alpha=0.7, linewidth=1.5)
ax.text(4.45, 12, 'Good', fontsize=7, color='#22c55e', va='center')
ax.text(4.45, 35, 'Mod', fontsize=7, color='#fbbf24', va='center')
ax.text(4.45, 55, 'Unhealthy', fontsize=7, color='#ef4444', va='center')

# Color zones - subtle
ax.axhspan(0, 12, alpha=0.08, color='#22c55e')
ax.axhspan(12, 35, alpha=0.06, color='#fbbf24')
ax.axhspan(35, 55, alpha=0.06, color='#f97316')
ax.axhspan(55, 160, alpha=0.06, color='#ef4444')

labels = ['Downtown', 'Industrial', 'Residential', 'Suburban']
means = [downtown.mean(), industrial.mean(), residential.mean(), suburban.mean()]

# Mean values at bottom
for i, (mean, color) in enumerate(zip(means, colors)):
    ax.text(i+1, -12, f'μ={mean:.1f}', ha='center', fontsize=9, color=color, fontweight='bold')

# Stats at top
stats_text = f"ANOVA: F={F_stat:.2f}, p={p_value:.2e} | Best: Suburban (μ={suburban.mean():.1f})"
bbox = dict(boxstyle="round,pad=0.3", facecolor='#f0fdf4', edgecolor='#6CF527', lw=2)
ax.text(0.5, 1.02, stats_text, transform=ax.transAxes, fontsize=10, color='#1f2937',
        ha='center', va='bottom', fontfamily='monospace', bbox=bbox)

ax.set_xticks([1, 2, 3, 4])
ax.set_xticklabels(labels, fontsize=11, color='#1f2937')
ax.set_ylabel('PM2.5 (μg/m³)', fontsize=12, color='#1f2937', fontweight='500')
ax.set_title('Air Quality Index by City Zone\nAnnual PM2.5 Measurements', 
             fontsize=14, color='#1f2937', fontweight='bold', pad=25)

ax.tick_params(colors='#374151')
for spine in ax.spines.values():
    spine.set_color('#e5e7eb')
ax.yaxis.grid(True, color='#f3f4f6', linewidth=0.8)
ax.set_axisbelow(True)
ax.set_ylim(-20, 160)

plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Statistical

Did this help you?

Support PyLucid to keep it free & growing

Support