Sankey Diagram

Data Pipeline ETL Flow

Data engineering pipeline showing flow from ingestion through transformation to storage destinations.

Output
Data Pipeline ETL Flow
Python
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.path import Path

def draw_flow(ax, x0, y0, x1, y1, w0, w1, color, alpha=0.6):
    cx = (x0 + x1) / 2
    verts = [
        (x0, y0 + w0/2), (cx, y0 + w0/2), (cx, y1 + w1/2), (x1, y1 + w1/2),
        (x1, y1 - w1/2), (cx, y1 - w1/2), (cx, y0 - w0/2), (x0, y0 - w0/2),
        (x0, y0 + w0/2)
    ]
    codes = [Path.MOVETO] + [Path.CURVE4]*3 + [Path.LINETO] + [Path.CURVE4]*3 + [Path.CLOSEPOLY]
    ax.add_patch(mpatches.PathPatch(Path(verts, codes), fc=color, alpha=alpha, ec='none'))

def draw_node(ax, x, y, w, h, color, label):
    ax.add_patch(mpatches.FancyBboxPatch((x-w/2, y-h/2), w, h, boxstyle="round,pad=0.02",
                                          fc=color, ec='white', lw=1.5))
    ax.text(x, y, label, ha='center', va='center', fontsize=8, color='white', fontweight='bold')

fig, ax = plt.subplots(figsize=(14, 8), facecolor='#0a0a0f')
ax.set_facecolor('#0a0a0f')

s = 0.0004

# Ingest sources
draw_flow(ax, 0.5, 7, 2.5, 5.5, 5000*s, 5000*s, '#276CF5', 0.7)
draw_flow(ax, 0.5, 4, 2.5, 4.5, 3000*s, 3000*s, '#4927F5', 0.7)
draw_flow(ax, 0.5, 2, 2.5, 3.5, 2000*s, 2000*s, '#5314E6', 0.7)

# Transform
draw_flow(ax, 3.5, 5, 5.5, 5, 9000*s, 9000*s, '#27D3F5', 0.7)
draw_flow(ax, 3.5, 4.3, 5.5, 2, 1000*s, 1000*s, '#C82909', 0.7)

# Load destinations
draw_flow(ax, 6.5, 5.5, 8.5, 7, 4000*s, 4000*s, '#6CF527', 0.7)
draw_flow(ax, 6.5, 4.5, 8.5, 4, 3000*s, 3000*s, '#27F5B0', 0.7)
draw_flow(ax, 6.5, 4, 8.5, 2, 2000*s, 2000*s, '#F5B027', 0.7)

# Nodes
draw_node(ax, 0, 7, 0.6, 5000*s*1.5, '#276CF5', 'API\n5TB')
draw_node(ax, 0, 4, 0.6, 3000*s*1.8, '#4927F5', 'DB\n3TB')
draw_node(ax, 0, 2, 0.6, 2000*s*2, '#5314E6', 'Files\n2TB')
draw_node(ax, 3, 5, 0.6, 10000*s*1.2, '#27D3F5', 'Transform\n10TB')
draw_node(ax, 6, 2, 0.6, 1000*s*3, '#C82909', 'Invalid\n1TB')
draw_node(ax, 6, 5, 0.6, 9000*s*1.2, '#6CF527', 'Clean\n9TB')
draw_node(ax, 9, 7, 0.6, 4000*s*1.5, '#6CF527', 'Lake\n4TB')
draw_node(ax, 9, 4, 0.6, 3000*s*1.6, '#27F5B0', 'DW\n3TB')
draw_node(ax, 9, 2, 0.6, 2000*s*2, '#F5B027', 'Archive\n2TB')

ax.set_title('ETL Data Pipeline Flow', fontsize=16, color='white', fontweight='bold', pad=20)
ax.set_xlim(-1, 10)
ax.set_ylim(0, 9)
ax.axis('off')
plt.tight_layout()
plt.show()
Library

Matplotlib

Category

Part-to-Whole

Did this help you?

Support PyLucid to keep it free & growing

Support