Sankey Diagram
Data Pipeline ETL Flow
Data engineering pipeline showing flow from ingestion through transformation to storage destinations.
Output
Python
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.path import Path
def draw_flow(ax, x0, y0, x1, y1, w0, w1, color, alpha=0.6):
cx = (x0 + x1) / 2
verts = [
(x0, y0 + w0/2), (cx, y0 + w0/2), (cx, y1 + w1/2), (x1, y1 + w1/2),
(x1, y1 - w1/2), (cx, y1 - w1/2), (cx, y0 - w0/2), (x0, y0 - w0/2),
(x0, y0 + w0/2)
]
codes = [Path.MOVETO] + [Path.CURVE4]*3 + [Path.LINETO] + [Path.CURVE4]*3 + [Path.CLOSEPOLY]
ax.add_patch(mpatches.PathPatch(Path(verts, codes), fc=color, alpha=alpha, ec='none'))
def draw_node(ax, x, y, w, h, color, label):
ax.add_patch(mpatches.FancyBboxPatch((x-w/2, y-h/2), w, h, boxstyle="round,pad=0.02",
fc=color, ec='white', lw=1.5))
ax.text(x, y, label, ha='center', va='center', fontsize=8, color='white', fontweight='bold')
fig, ax = plt.subplots(figsize=(14, 8), facecolor='#0a0a0f')
ax.set_facecolor('#0a0a0f')
s = 0.0004
# Ingest sources
draw_flow(ax, 0.5, 7, 2.5, 5.5, 5000*s, 5000*s, '#276CF5', 0.7)
draw_flow(ax, 0.5, 4, 2.5, 4.5, 3000*s, 3000*s, '#4927F5', 0.7)
draw_flow(ax, 0.5, 2, 2.5, 3.5, 2000*s, 2000*s, '#5314E6', 0.7)
# Transform
draw_flow(ax, 3.5, 5, 5.5, 5, 9000*s, 9000*s, '#27D3F5', 0.7)
draw_flow(ax, 3.5, 4.3, 5.5, 2, 1000*s, 1000*s, '#C82909', 0.7)
# Load destinations
draw_flow(ax, 6.5, 5.5, 8.5, 7, 4000*s, 4000*s, '#6CF527', 0.7)
draw_flow(ax, 6.5, 4.5, 8.5, 4, 3000*s, 3000*s, '#27F5B0', 0.7)
draw_flow(ax, 6.5, 4, 8.5, 2, 2000*s, 2000*s, '#F5B027', 0.7)
# Nodes
draw_node(ax, 0, 7, 0.6, 5000*s*1.5, '#276CF5', 'API\n5TB')
draw_node(ax, 0, 4, 0.6, 3000*s*1.8, '#4927F5', 'DB\n3TB')
draw_node(ax, 0, 2, 0.6, 2000*s*2, '#5314E6', 'Files\n2TB')
draw_node(ax, 3, 5, 0.6, 10000*s*1.2, '#27D3F5', 'Transform\n10TB')
draw_node(ax, 6, 2, 0.6, 1000*s*3, '#C82909', 'Invalid\n1TB')
draw_node(ax, 6, 5, 0.6, 9000*s*1.2, '#6CF527', 'Clean\n9TB')
draw_node(ax, 9, 7, 0.6, 4000*s*1.5, '#6CF527', 'Lake\n4TB')
draw_node(ax, 9, 4, 0.6, 3000*s*1.6, '#27F5B0', 'DW\n3TB')
draw_node(ax, 9, 2, 0.6, 2000*s*2, '#F5B027', 'Archive\n2TB')
ax.set_title('ETL Data Pipeline Flow', fontsize=16, color='white', fontweight='bold', pad=20)
ax.set_xlim(-1, 10)
ax.set_ylim(0, 9)
ax.axis('off')
plt.tight_layout()
plt.show()
Library
Matplotlib
Category
Part-to-Whole
More Sankey Diagram examples
☕