Pairplot Image
Generate a pairplot visualization showing pairwise relationships between features.
Pairplot Image
Processing
This function takes tabular data (Pandas DataFrame, Polars DataFrame, or PyArrow Table) and generates a pairplot visualization, which displays pairwise relationships between features. It automatically selects all numeric columns if no specific columns are provided, and allows conditioning the visualization using a hue column. The resulting image is rendered to memory and returned in a user-specified format (NumPy array, PIL Image, bytes, or BytesIO stream).
Inputs
- data
- Input data used for visualization, typically containing multiple numeric features.
Inputs Types
| Input | Types |
|---|---|
data |
DataFrame, ArrowTable |
You can check the list of supported types here: Available Type Hints.
Outputs
- image
- The generated pairplot visualization. The specific format depends on the 'Output Type' option selected.
Outputs Types
| Output | Types |
|---|---|
image |
MediaData, PILImage |
You can check the list of supported types here: Available Type Hints.
Options
The Pairplot Image brick contains some changeable options:
- Columns to Plot
- List of specific columns to include in the pairplot matrix. If left empty, the function defaults to using all numeric columns found in the input data.
- Hue Column
- Name of the column used to color code the points in the plot based on categorical values.
- Color Palette
- The color scheme used for rendering the plot. Available choices include standard Seaborn palettes like
husl,deep,muted, etc. - Diagonal Plot Type
- Specifies the type of plot drawn on the diagonal axes, such as
hist(histogram) orkde(Kernel Density Estimate). - Only Lower
- If enabled, only the lower triangle of the plot matrix is drawn, making the output cleaner when analyzing symmetry is unnecessary.
- Output Type
- Defines the format of the returned image object: NumPy array (
array), PIL Image object (pil), raw bytes (bytes), or BytesIO stream (bytesio). - Verbose
- If enabled, detailed logs and information about the execution process are printed.
import logging
import io
import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from coded_flows.types import Union, DataFrame, ArrowTable, MediaData, PILImage
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def pairplot(
data: Union[DataFrame, ArrowTable], options=None
) -> Union[MediaData, PILImage]:
brick_display_name = "Pairplot Image"
options = options or {}
verbose = options.get("verbose", True)
output_type = options.get("output_type", "array")
columns = options.get("columns", None)
hue = options.get("hue", "")
palette = options.get("palette", "husl")
diag_kind = options.get("diag_kind", "auto")
corner = options.get("corner", False)
dpi = 300
verbose and logger.info(
f"[{brick_display_name}] Starting pairplot generation with output type: '{output_type}'"
)
df = None
try:
if isinstance(data, pl.DataFrame):
verbose and logger.info(
f"[{brick_display_name}] Converting polars DataFrame to pandas"
)
df = data.to_pandas()
elif isinstance(data, pa.Table):
verbose and logger.info(
f"[{brick_display_name}] Converting Arrow table to pandas"
)
df = data.to_pandas()
elif isinstance(data, pd.DataFrame):
verbose and logger.info(
f"[{brick_display_name}] Input is already pandas DataFrame"
)
df = data
else:
error_msg = f"Unsupported data type: {type(data).__name__}"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
except Exception as e:
error_msg = f"Failed to convert input data to pandas DataFrame: {e}"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise RuntimeError(error_msg) from e
if df.empty:
error_msg = "Input DataFrame is empty"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
verbose and logger.info(
f"[{brick_display_name}] Processing DataFrame with {df.shape[0]:,} rows × {df.shape[1]:,} columns"
)
try:
if columns and len(columns) > 0:
missing_cols = [col for col in columns if col not in df.columns]
if missing_cols:
error_msg = f"Columns not found in DataFrame: {missing_cols}"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
plot_cols = list(columns)
verbose and logger.info(
f"[{brick_display_name}] Using specified columns: {plot_cols}"
)
else:
plot_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if not plot_cols:
error_msg = "No numeric columns found in DataFrame"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
verbose and logger.info(
f"[{brick_display_name}] Using all numeric columns: {plot_cols}"
)
hue_col = None
if hue and hue.strip():
if hue not in df.columns:
error_msg = f"Hue column '{hue}' not found in DataFrame"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
if hue not in plot_cols:
plot_cols.append(hue)
hue_col = hue
verbose and logger.info(f"[{brick_display_name}] Using hue column: '{hue}'")
verbose and logger.info(
f"[{brick_display_name}] Creating pairplot (diag_kind={diag_kind}, corner={corner})"
)
pairplot_obj = sns.pairplot(
df[plot_cols],
hue=hue_col,
palette=palette,
diag_kind=diag_kind,
corner=corner,
)
verbose and logger.info(
f"[{brick_display_name}] Rendering to {output_type} format with DPI={dpi}"
)
buf = io.BytesIO()
pairplot_obj.savefig(buf, format="png", dpi=dpi, bbox_inches="tight")
buf.seek(0)
if output_type == "bytesio":
image = buf
elif output_type == "bytes":
image = buf.getvalue()
buf.close()
elif output_type == "pil":
image = Image.open(buf)
buf.close()
elif output_type == "array":
img = Image.open(buf)
image = np.array(img)
buf.close()
else:
error_msg = f"Invalid output_type: '{output_type}'"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise ValueError(error_msg)
plt.close(pairplot_obj.fig)
except (ValueError, RuntimeError):
plt.close("all")
raise
except Exception as e:
error_msg = f"Failed to generate pairplot: {e}"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
plt.close("all")
raise RuntimeError(error_msg) from e
if image is None:
error_msg = "Pairplot generation returned empty result"
verbose and logger.error(f"[{brick_display_name}] {error_msg}")
raise RuntimeError(error_msg)
verbose and logger.info(
f"[{brick_display_name}] Successfully generated pairplot as {output_type}"
)
return image
Brick Info
- matplotlib
- polars[pyarrow]
- pillow
- seaborn
- pandas
- numpy
- pyarrow