Boxplot
LLM Token Latency
Time-to-first-token distribution across AI models
Output
Python
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
models = ['GPT-4', 'Claude', 'Gemini', 'Llama', 'Mistral']
data = [
np.random.lognormal(6.5, 0.3, 250),
np.random.lognormal(6.3, 0.25, 250),
np.random.lognormal(6.4, 0.35, 250),
np.random.lognormal(5.8, 0.4, 250),
np.random.lognormal(5.5, 0.35, 250)
]
fig, ax = plt.subplots(figsize=(10, 6), dpi=100)
ax.set_facecolor('#0d1117')
fig.patch.set_facecolor('#0d1117')
colors = ['#27F5B0', '#F5276C', '#276CF5', '#F5B027', '#6CF527']
bp = ax.boxplot(data, widths=0.55, patch_artist=True, showfliers=False,
medianprops=dict(color='white', linewidth=2))
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
patch.set_alpha(0.8)
patch.set_edgecolor(color)
patch.set_linewidth(1.5)
for i, color in enumerate(colors):
bp['whiskers'][i*2].set_color(color)
bp['whiskers'][i*2+1].set_color(color)
bp['caps'][i*2].set_color(color)
bp['caps'][i*2+1].set_color(color)
ax.axhline(500, color='#5314E6', linewidth=1.5, linestyle=':', alpha=0.7, label='Target: 500ms')
ax.set_xticklabels(models)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_color('#333333')
ax.spines['bottom'].set_color('#333333')
ax.yaxis.grid(True, color='#1a1a3f', linewidth=0.5, zorder=0)
ax.set_axisbelow(True)
ax.tick_params(axis='both', colors='#888888', labelsize=9, length=0, pad=8)
ax.set_ylabel('Time to First Token (ms)', fontsize=11, color='white', fontweight='500')
ax.set_title('LLM Response Latency Comparison', fontsize=14, color='white', fontweight='bold', pad=15)
ax.legend(loc='upper right', facecolor='#0d1117', edgecolor='#333333', labelcolor='white')
plt.tight_layout()
plt.show()
Library
Matplotlib
Category
Statistical
☕