Raincloud Plot

Employee Tenure Distribution Raincloud

Comparing employee tenure distributions across company departments.

Output
Employee Tenure Distribution Raincloud
Python
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

np.random.seed(1414)

# Tenure in years
engineering = np.random.lognormal(1.2, 0.6, 100)
sales = np.random.lognormal(0.8, 0.7, 90)
marketing = np.random.lognormal(1.0, 0.65, 75)
ops = np.random.lognormal(1.5, 0.5, 80)

engineering = np.clip(engineering, 0.5, 15)
sales = np.clip(sales, 0.3, 10)
marketing = np.clip(marketing, 0.4, 12)
ops = np.clip(ops, 0.6, 18)

F_stat, p_value = stats.f_oneway(engineering, sales, marketing, ops)

BG_COLOR = "#ffffff"
COLOR_SCALE = ["#4927F5", "#F5276C", "#F5B027", "#6CF527"]

fig, ax = plt.subplots(figsize=(10, 6), facecolor=BG_COLOR)
ax.set_facecolor(BG_COLOR)

y_data = [engineering, sales, marketing, ops]
positions = [0, 1, 2, 3]
labels = ["Engineering", "Sales", "Marketing", "Operations"]

for h in [2, 5, 10]:
    ax.axhline(h, color='#e5e7eb', ls=(0, (5, 5)), alpha=0.8, zorder=0)

# Industry average
ax.axhline(y=4.1, color='#6b7280', ls='--', alpha=0.6, lw=2)
ax.text(3.55, 4.1, "Industry\nAvg", color='#6b7280', fontsize=8, va='center')

violins = ax.violinplot(y_data, positions=positions, widths=0.5, 
                         bw_method="silverman", showmeans=False, 
                         showmedians=False, showextrema=False)
for pc in violins["bodies"]:
    pc.set_facecolor("none")
    pc.set_edgecolor("#374151")
    pc.set_linewidth(1.8)

bp = ax.boxplot(y_data, positions=positions, showfliers=False, showcaps=False,
                medianprops=dict(linewidth=3, color='#1f2937'),
                whiskerprops=dict(linewidth=2, color='#9ca3af'),
                boxprops=dict(linewidth=2, color='#9ca3af'))

for i, (y, color) in enumerate(zip(y_data, COLOR_SCALE)):
    x_jitter = np.array([i] * len(y)) + stats.t(df=6, scale=0.04).rvs(len(y))
    ax.scatter(x_jitter, y, s=50, color=color, alpha=0.5, zorder=2)

means = [y.mean() for y in y_data]
for i, (mean, color) in enumerate(zip(means, COLOR_SCALE)):
    ax.scatter(i, mean, s=180, color='#C82909', zorder=5, edgecolors='white', linewidths=2)
    ax.plot([i, i + 0.28], [mean, mean], ls="dashdot", color="#374151", zorder=3, lw=1.5)
    ax.text(i + 0.3, mean, f"μ={mean:.1f}y", fontsize=10, va="center", color='#1f2937',
            bbox=dict(facecolor='white', edgecolor=color, boxstyle="round,pad=0.15", lw=2))

# Turnover rates
turnover = ["12%", "28%", "22%", "8%"]
for i, (t, color) in enumerate(zip(turnover, COLOR_SCALE)):
    ax.text(i, -1.2, f"Turnover: {t}", ha='center', fontsize=9, color=color)

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_color('#d1d5db')
ax.spines["bottom"].set_color('#d1d5db')
ax.tick_params(colors='#374151', length=0)

xlabels_full = [f"{l}\n(n={len(y_data[i])})" for i, l in enumerate(labels)]
ax.set_xticks(positions)
ax.set_xticklabels(xlabels_full, size=11, color='#1f2937')
ax.set_ylabel("Tenure (Years)", size=14, color='#1f2937', fontweight='bold')

ax.set_title("Employee Tenure Distribution", fontsize=14, color="white", fontweight="bold", pad=20)


ax.set_ylim(-2, 20)
plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Statistical

Did this help you?

Support PyLucid to keep it free & growing

Support