Raincloud Plot

Home Price by Neighborhood Raincloud

Distribution of housing prices across city neighborhoods.

Output
Home Price by Neighborhood Raincloud
Python
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

np.random.seed(1313)

# Price in thousands USD
downtown = np.random.lognormal(6.4, 0.35, 80)
midtown = np.random.lognormal(6.1, 0.4, 95)
suburbs = np.random.lognormal(5.7, 0.45, 120)
rural = np.random.lognormal(5.3, 0.5, 70)

downtown = np.clip(downtown, 400, 1500)
midtown = np.clip(midtown, 300, 1000)
suburbs = np.clip(suburbs, 200, 700)
rural = np.clip(rural, 100, 500)

F_stat, p_value = stats.f_oneway(downtown, midtown, suburbs, rural)

BG_COLOR = "#ffffff"
COLOR_SCALE = ["#F5276C", "#F5B027", "#27D3F5", "#6CF527"]

fig, ax = plt.subplots(figsize=(10, 6), facecolor=BG_COLOR)
ax.set_facecolor(BG_COLOR)

y_data = [downtown, midtown, suburbs, rural]
positions = [0, 1, 2, 3]
labels = ["Downtown", "Midtown", "Suburbs", "Rural"]

for h in [250, 500, 750, 1000]:
    ax.axhline(h, color='#e5e7eb', ls=(0, (5, 5)), alpha=0.8, zorder=0)

# Median income affordability line
ax.axhline(y=400, color='#22c55e', ls='--', alpha=0.6, lw=2)
ax.text(3.55, 400, "Median\nAffordable", color='#22c55e', fontsize=8, va='center')

violins = ax.violinplot(y_data, positions=positions, widths=0.5, 
                         bw_method="silverman", showmeans=False, 
                         showmedians=False, showextrema=False)
for pc in violins["bodies"]:
    pc.set_facecolor("none")
    pc.set_edgecolor("#374151")
    pc.set_linewidth(1.8)

bp = ax.boxplot(y_data, positions=positions, showfliers=False, showcaps=False,
                medianprops=dict(linewidth=3, color='#1f2937'),
                whiskerprops=dict(linewidth=2, color='#9ca3af'),
                boxprops=dict(linewidth=2, color='#9ca3af'))

for i, (y, color) in enumerate(zip(y_data, COLOR_SCALE)):
    x_jitter = np.array([i] * len(y)) + stats.t(df=6, scale=0.04).rvs(len(y))
    ax.scatter(x_jitter, y, s=50, color=color, alpha=0.5, zorder=2)

means = [y.mean() for y in y_data]
medians = [np.median(y) for y in y_data]
for i, (mean, median, color) in enumerate(zip(means, medians, COLOR_SCALE)):
    ax.scatter(i, mean, s=180, color='#C82909', zorder=5, edgecolors='white', linewidths=2)
    ax.plot([i, i + 0.28], [mean, mean], ls="dashdot", color="#374151", zorder=3, lw=1.5)
    ax.text(i + 0.3, mean, f"μ=${mean:.0f}K", fontsize=10, va="center", color='#1f2937',
            bbox=dict(facecolor='white', edgecolor=color, boxstyle="round,pad=0.15", lw=2))

# Price per sqft
ppsf = ["$650", "$485", "$320", "$195"]
for i, (p, color) in enumerate(zip(ppsf, COLOR_SCALE)):
    ax.text(i, 50, f"$/sqft: {p}", ha='center', fontsize=9, color=color)

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_color('#d1d5db')
ax.spines["bottom"].set_color('#d1d5db')
ax.tick_params(colors='#374151', length=0)

xlabels_full = [f"{l}\n(n={len(y_data[i])})" for i, l in enumerate(labels)]
ax.set_xticks(positions)
ax.set_xticklabels(xlabels_full, size=11, color='#1f2937')
ax.set_ylabel("Home Price ($K)", size=14, color='#1f2937', fontweight='bold')

ax.set_title("Home Sale Prices by Neighborhood", fontsize=14, color="white", fontweight="bold", pad=20)


ax.set_ylim(0, 1600)
plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Statistical

Did this help you?

Support PyLucid to keep it free & growing

Support