Raincloud Plot

Class Size Impact on Scores Raincloud

Distribution of student test scores across different class sizes with statistical analysis.

Output
Class Size Impact on Scores Raincloud
Python
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

np.random.seed(1111)

# Test scores (0-100)
small = np.random.normal(82, 8, 80)  # <15 students
medium = np.random.normal(76, 10, 120)  # 15-25 students
large = np.random.normal(71, 12, 100)  # 25-35 students
xlarge = np.random.normal(65, 14, 70)  # >35 students

small = np.clip(small, 55, 100)
medium = np.clip(medium, 45, 98)
large = np.clip(large, 35, 95)
xlarge = np.clip(xlarge, 25, 92)

F_stat, p_value = stats.f_oneway(small, medium, large, xlarge)

BG_COLOR = "#ffffff"
GREY_LIGHT = "#d1d5db"
COLOR_SCALE = ["#6CF527", "#27D3F5", "#F5B027", "#F5276C"]

fig, ax = plt.subplots(figsize=(10, 6), facecolor=BG_COLOR)
ax.set_facecolor(BG_COLOR)

y_data = [small, medium, large, xlarge]
positions = [0, 1, 2, 3]
labels = ["Small\n(<15)", "Medium\n(15-25)", "Large\n(25-35)", "X-Large\n(>35)"]

for h in [50, 70, 90]:
    ax.axhline(h, color=GREY_LIGHT, ls=(0, (5, 5)), alpha=0.8, zorder=0)

# Passing grade
ax.axhline(y=60, color='#ef4444', ls='--', alpha=0.6, lw=2)
ax.text(3.55, 60, "Pass", color='#ef4444', fontsize=9, va='center')

violins = ax.violinplot(y_data, positions=positions, widths=0.5, 
                         bw_method="silverman", showmeans=False, 
                         showmedians=False, showextrema=False)
for pc in violins["bodies"]:
    pc.set_facecolor("none")
    pc.set_edgecolor("#374151")
    pc.set_linewidth(1.8)

bp = ax.boxplot(y_data, positions=positions, showfliers=False, showcaps=False,
                medianprops=dict(linewidth=3, color='#1f2937'),
                whiskerprops=dict(linewidth=2, color='#9ca3af'),
                boxprops=dict(linewidth=2, color='#9ca3af'))

for i, (y, color) in enumerate(zip(y_data, COLOR_SCALE)):
    x_jitter = np.array([i] * len(y)) + stats.t(df=6, scale=0.04).rvs(len(y))
    ax.scatter(x_jitter, y, s=50, color=color, alpha=0.5, zorder=2)

means = [y.mean() for y in y_data]
for i, (mean, color) in enumerate(zip(means, COLOR_SCALE)):
    ax.scatter(i, mean, s=180, color='#C82909', zorder=5, edgecolors='white', linewidths=2)
    ax.plot([i, i + 0.28], [mean, mean], ls="dashdot", color="#374151", zorder=3, lw=1.5)
    ax.text(i + 0.3, mean, f"μ={mean:.1f}", fontsize=10, va="center", color='#1f2937',
            bbox=dict(facecolor='white', edgecolor=color, boxstyle="round,pad=0.15", lw=2))

# Comparison bracket
tick = 3
ax.plot([0, 0, 3, 3], [98-tick, 98, 98, 98-tick], c="#374151", lw=1.5)
ax.text(1.5, 100, "p<0.001, Games-Howell", fontsize=10, va="bottom", ha="center", color='#C82909')

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_color(GREY_LIGHT)
ax.spines["bottom"].set_color(GREY_LIGHT)
ax.tick_params(colors='#374151', length=0)

xlabels_full = [f"{l}\n(n={len(y_data[i])})" for i, l in enumerate(labels)]
ax.set_xticks(positions)
ax.set_xticklabels(xlabels_full, size=10, color='#1f2937')
ax.set_ylabel("Test Score", size=14, color='#1f2937', fontweight='bold')
ax.set_xlabel("Class Size", size=14, color='#1f2937', fontweight='bold')

ax.set_title("Class Size Effect on Student Achievement", fontsize=14, color="white", fontweight="bold", pad=20)


ax.set_ylim(20, 105)
plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Statistical

Did this help you?

Support PyLucid to keep it free & growing

Support