Pairplot Image

Generate a pairplot visualization showing pairwise relationships between features.

Pairplot Image

Processing

This function takes tabular data (Pandas DataFrame, Polars DataFrame, or PyArrow Table) and generates a pairplot visualization, which displays pairwise relationships between features. It automatically selects all numeric columns if no specific columns are provided, and allows conditioning the visualization using a hue column. The resulting image is rendered to memory and returned in a user-specified format (NumPy array, PIL Image, bytes, or BytesIO stream).

Inputs

data: Input data used for visualization, typically containing multiple numeric features.

Inputs Types

Input	Types
`data`	`DataFrame`, `ArrowTable`

You can check the list of supported types here: Available Type Hints.

Outputs

image: The generated pairplot visualization. The specific format depends on the 'Output Type' option selected.

Outputs Types

Output	Types
`image`	`MediaData`, `PILImage`

You can check the list of supported types here: Available Type Hints.

Options

The Pairplot Image brick contains some changeable options:

Columns to Plot: List of specific columns to include in the pairplot matrix. If left empty, the function defaults to using all numeric columns found in the input data.
Hue Column: Name of the column used to color code the points in the plot based on categorical values.
Color Palette: The color scheme used for rendering the plot. Available choices include standard Seaborn palettes like husl, deep, muted, etc.
Diagonal Plot Type: Specifies the type of plot drawn on the diagonal axes, such as hist (histogram) or kde (Kernel Density Estimate).
Only Lower: If enabled, only the lower triangle of the plot matrix is drawn, making the output cleaner when analyzing symmetry is unnecessary.
Output Type: Defines the format of the returned image object: NumPy array (array), PIL Image object (pil), raw bytes (bytes), or BytesIO stream (bytesio).
Verbose: If enabled, detailed logs and information about the execution process are printed.

import logging
import io
import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from coded_flows.types import Union, DataFrame, ArrowTable, MediaData, PILImage

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def pairplot(
    data: Union[DataFrame, ArrowTable], options=None
) -> Union[MediaData, PILImage]:
    brick_display_name = "Pairplot Image"
    options = options or {}
    verbose = options.get("verbose", True)
    output_type = options.get("output_type", "array")
    columns = options.get("columns", None)
    hue = options.get("hue", "")
    palette = options.get("palette", "husl")
    diag_kind = options.get("diag_kind", "auto")
    corner = options.get("corner", False)
    dpi = 300
    verbose and logger.info(
        f"[{brick_display_name}] Starting pairplot generation with output type: '{output_type}'"
    )
    df = None
    try:
        if isinstance(data, pl.DataFrame):
            verbose and logger.info(
                f"[{brick_display_name}] Converting polars DataFrame to pandas"
            )
            df = data.to_pandas()
        elif isinstance(data, pa.Table):
            verbose and logger.info(
                f"[{brick_display_name}] Converting Arrow table to pandas"
            )
            df = data.to_pandas()
        elif isinstance(data, pd.DataFrame):
            verbose and logger.info(
                f"[{brick_display_name}] Input is already pandas DataFrame"
            )
            df = data
        else:
            error_msg = f"Unsupported data type: {type(data).__name__}"
            verbose and logger.error(f"[{brick_display_name}] {error_msg}")
            raise ValueError(error_msg)
    except Exception as e:
        error_msg = f"Failed to convert input data to pandas DataFrame: {e}"
        verbose and logger.error(f"[{brick_display_name}] {error_msg}")
        raise RuntimeError(error_msg) from e
    if df.empty:
        error_msg = "Input DataFrame is empty"
        verbose and logger.error(f"[{brick_display_name}] {error_msg}")
        raise ValueError(error_msg)
    verbose and logger.info(
        f"[{brick_display_name}] Processing DataFrame with {df.shape[0]:,} rows × {df.shape[1]:,} columns"
    )
    try:
        if columns and len(columns) > 0:
            missing_cols = [col for col in columns if col not in df.columns]
            if missing_cols:
                error_msg = f"Columns not found in DataFrame: {missing_cols}"
                verbose and logger.error(f"[{brick_display_name}] {error_msg}")
                raise ValueError(error_msg)
            plot_cols = list(columns)
            verbose and logger.info(
                f"[{brick_display_name}] Using specified columns: {plot_cols}"
            )
        else:
            plot_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            if not plot_cols:
                error_msg = "No numeric columns found in DataFrame"
                verbose and logger.error(f"[{brick_display_name}] {error_msg}")
                raise ValueError(error_msg)
            verbose and logger.info(
                f"[{brick_display_name}] Using all numeric columns: {plot_cols}"
            )
        hue_col = None
        if hue and hue.strip():
            if hue not in df.columns:
                error_msg = f"Hue column '{hue}' not found in DataFrame"
                verbose and logger.error(f"[{brick_display_name}] {error_msg}")
                raise ValueError(error_msg)
            if hue not in plot_cols:
                plot_cols.append(hue)
            hue_col = hue
            verbose and logger.info(f"[{brick_display_name}] Using hue column: '{hue}'")
        verbose and logger.info(
            f"[{brick_display_name}] Creating pairplot (diag_kind={diag_kind}, corner={corner})"
        )
        pairplot_obj = sns.pairplot(
            df[plot_cols],
            hue=hue_col,
            palette=palette,
            diag_kind=diag_kind,
            corner=corner,
        )
        verbose and logger.info(
            f"[{brick_display_name}] Rendering to {output_type} format with DPI={dpi}"
        )
        buf = io.BytesIO()
        pairplot_obj.savefig(buf, format="png", dpi=dpi, bbox_inches="tight")
        buf.seek(0)
        if output_type == "bytesio":
            image = buf
        elif output_type == "bytes":
            image = buf.getvalue()
            buf.close()
        elif output_type == "pil":
            image = Image.open(buf)
            buf.close()
        elif output_type == "array":
            img = Image.open(buf)
            image = np.array(img)
            buf.close()
        else:
            error_msg = f"Invalid output_type: '{output_type}'"
            verbose and logger.error(f"[{brick_display_name}] {error_msg}")
            raise ValueError(error_msg)
        plt.close(pairplot_obj.fig)
    except (ValueError, RuntimeError):
        plt.close("all")
        raise
    except Exception as e:
        error_msg = f"Failed to generate pairplot: {e}"
        verbose and logger.error(f"[{brick_display_name}] {error_msg}")
        plt.close("all")
        raise RuntimeError(error_msg) from e
    if image is None:
        error_msg = "Pairplot generation returned empty result"
        verbose and logger.error(f"[{brick_display_name}] {error_msg}")
        raise RuntimeError(error_msg)
    verbose and logger.info(
        f"[{brick_display_name}] Successfully generated pairplot as {output_type}"
    )
    return image

Brick Info

version v0.1.0

python 3.10, 3.11, 3.12, 3.13

requirements

matplotlib
polars[pyarrow]
pillow
seaborn
pandas
numpy
pyarrow