Directory Explorer

Lists files/folders in a folder, with optional filtering, subfolder search, and sorting.

Directory Explorer

Processing

Examines the specified folder and returns a list of all items (files and/or folders) that match the given criteria. If subfolders are included, it recursively searches through the entire directory tree.

Inputs

folder path: The specified folder path for exploration.
filter pattern (optional): A regex expression to filter the returned textual paths. If empty, no filtering applied.

Inputs Types

Input	Types
`folder path`	`Str`, `DirectoryPath`
`filter pattern`	`Str`

You can check the list of supported types here: Available Type Hints.

Outputs

paths: The list of absolute paths.
items data: The enriched list of paths with metadata, like size, mimetypes, relative path etc...
items count: The total number of items found.

The items data output contains the following metadata:

name: The name of the file or folder (e.g., document.pdf, images).
size_bytes: The size of the file in bytes (e.g., 1024). For folders, this is usually 0 or not applicable.
size_formatted: The human-readable file size (e.g., 1 KB, 2.5 MB).
mimetype: The MIME type of the file (e.g., application/pdf, image/jpeg). Empty for folders and other unknown types.
type: Indicates whether the item is a file or a folder.
type_detail: Additional type information, including the file extension.
date_modified: The last modified date as a datetime object.
date_modified_str: The last modified date in a human-readable string format.
full_path: The absolute path to the file or folder (e.g., /home/user/documents/report.pdf).
relative_path: The path relative to a specified base directory (e.g., documents/report.pdf).

Outputs Types

Input	Types
`paths`	`List`, `DataFrame`
`items data`	`DataRecords`, `DataFrame`
`items count`	`Int`

You can check the list of supported types here: Available Type Hints.

Options

The Directory Explorer brick contains some changeable options:

Include Subfolders: Activating the recursive search through the entire directory.
Regex Pattern: A regex expression used to filter the returned textual paths. If left empty, no filtering is applied. This option is ignored if the brick has a connection to its filter pattern input handle.
Sort By: Choosing which metadata value to use for sorting the outputs (name, size, modification date or path type ).
Ascending Sort: If active, the sorting is ascending.
Paths Type: Choosing the data type of the paths output, either as a list or a dataframe.
Items Type: Choosing the data type of the items data output, either as a list of records or a dataframe.
Verbose: Enables or disables log output for this brick.

import os
import re
import logging
import mimetypes
import pandas as pd
from datetime import datetime
from coded_flows.types import (
    Str,
    DirectoryPath,
    DataRecords,
    DataFrame,
    List,
    Int,
    Tuple,
    Union,
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def _is_regex(text: str) -> bool:
    try:
        re.compile(text)
        return True
    except re.error:
        return False


def _validate_and_raise(condition, error_type, message, brick_display_name, verbose):
    if not condition:
        verbose and logger.error(f"[{brick_display_name}] {message}")
        raise error_type(message)


def _coalesce(*values):
    return next((v for v in values if v is not None))


def _get_item_metadata(path):
    """Extract metadata for a file or folder."""
    try:
        stat_info = path.stat()
        if path.is_file():
            item_type = "File"
            extension = path.suffix.lower() if path.suffix else "No extension"
            type_detail = (
                f"File ({extension})" if extension != "No extension" else "File"
            )
        elif path.is_dir():
            item_type = "Folder"
            type_detail = "Folder"
        else:
            item_type = "Other"
            type_detail = "Other"
        size_bytes = stat_info.st_size if path.is_file() else 0
        size_formatted = _format_file_size(size_bytes)
        modified_timestamp = stat_info.st_mtime
        modified_date = datetime.fromtimestamp(modified_timestamp)
        mime_type = mimetypes.guess_type(path)[0]
        return {
            "name": path.name,
            "size_bytes": size_bytes,
            "size_formatted": size_formatted,
            "mimetype": mime_type,
            "type": item_type,
            "type_detail": type_detail,
            "date_modified": modified_date,
            "date_modified_str": modified_date.strftime("%Y-%m-%d %H:%M:%S"),
            "full_path": str(path) + os.sep if path.is_dir() else str(path),
            "relative_path": (
                str(path.relative_to(path.parent)) if path.parent != path else str(path)
            ),
        }
    except (OSError, PermissionError):
        return None


def _format_file_size(size_bytes):
    """Format file size in human-readable format."""
    if size_bytes == 0:
        return "0 B"
    size_names = ["B", "KB", "MB", "GB", "TB"]
    size = float(size_bytes)
    i = 0
    while size >= 1024.0 and i < len(size_names) - 1:
        size /= 1024.0
        i += 1
    return f"{size:.1f} {size_names[i]}"


def _sort_items(items, sort_by, ascending=True):
    """Sort items by the specified criteria with optional ascending/descending order."""
    sort_key_map = {
        "name": lambda x: x["name"].lower(),
        "size": lambda x: x["size_bytes"],
        "date": lambda x: x["date_modified"],
        "type": lambda x: (x["type"], x["name"].lower()),
    }
    sort_key = sort_key_map.get(sort_by.lower())
    if not sort_key:
        print(
            f"Warning: Unknown sort criteria '{sort_by}'. Available options: name, size, date, type"
        )
        return items
    reverse = not ascending
    return sorted(items, key=sort_key, reverse=reverse)


def directory_explorer(
    folder_path: Union[Str, DirectoryPath], filter_pattern: Str = None, options=None
) -> Tuple[Union[List, DataFrame], Union[DataRecords, DataFrame], Int]:
    options = options or {}
    brick_display_name = "Directory Explorer"
    filter_pattern = _coalesce(filter_pattern, options.get("filter_pattern", ""))
    include_subfolders = options.get("include_subfolders", False)
    sort_by = options.get("sort_by", "name")
    paths_list_type = options.get("paths_list_type", "list")
    items_metadata_type = options.get("items_metadata_type", "records")
    ascending = options.get("ascending", True)
    verbose = options.get("verbose", True)
    base_path = DirectoryPath(folder_path).resolve()
    _validate_and_raise(
        base_path.exists(),
        FileNotFoundError,
        f"Folder not found: {folder_path}",
        brick_display_name,
        verbose,
    )
    _validate_and_raise(
        base_path.is_dir(),
        ValueError,
        f"Path is not a directory: {folder_path}",
        brick_display_name,
        verbose,
    )
    _validate_and_raise(
        _is_regex(filter_pattern),
        ValueError,
        "'filter patterns' must be a valid regex.",
        brick_display_name,
        verbose,
    )
    verbose and logger.info(
        f"[{brick_display_name}] Exploring the folder '{base_path}'."
    )
    if filter_pattern:
        verbose and logger.info(
            f"[{brick_display_name}] Items will be filtered using the regex '{filter_pattern}'."
        )
    items_data = []
    verbose and logger.info(f"[{brick_display_name}] Collecting items...")
    glob_pattern = "**/*" if include_subfolders else "*"
    compiled_re = re.compile(filter_pattern)
    items_data = (
        [
            md
            for p in base_path.glob(glob_pattern)
            if compiled_re.match(p.name) and (md := _get_item_metadata(p)) is not None
        ]
        if filter_pattern
        else [
            md
            for p in base_path.glob(glob_pattern)
            if (md := _get_item_metadata(p)) is not None
        ]
    )
    items_count = len(items_data)
    verbose and logger.info(
        f"[{brick_display_name}] {items_count} element{('s' if items_count > 1 else '')} found."
    )
    items_data = _sort_items(items_data, sort_by, ascending)
    paths = [item["full_path"] for item in items_data]
    if paths_list_type == "dataframe":
        verbose and logger.info(
            f"[{brick_display_name}] Converting paths output to a dataframe."
        )
        paths = pd.DataFrame(paths, columns=["paths"])
    if items_metadata_type == "dataframe":
        verbose and logger.info(
            f"[{brick_display_name}] Converting items output to a dataframe."
        )
        items_data = pd.DataFrame(items_data)
    return (paths, items_data, items_count)

Brick Info

version v0.1.4

python 3.10, 3.11, 3.12, 3.13

requirements