Directory Explorer

Lists files/folders in a folder, with optional filtering, subfolder search, and sorting.

Directory Explorer


Processing

Examines the specified folder and returns a list of all items (files and/or folders) that match the given criteria. If subfolders are included, it recursively searches through the entire directory tree.

Inputs

folder path
The specified folder path for exploration.
filter pattern (optional)
A regex expression to filter the returned textual paths. If empty, no filtering applied.

Inputs Types

Input Types
folder path Str, DirectoryPath
filter pattern Str

You can check the list of supported types here: Available Type Hints.

Outputs

paths
The list of absolute paths.
items data
The enriched list of paths with metadata, like size, mimetypes, relative path etc...
items count
The total number of items found.

The items data output contains the following metadata:

  • name: The name of the file or folder (e.g., document.pdf, images).
  • size_bytes: The size of the file in bytes (e.g., 1024). For folders, this is usually 0 or not applicable.
  • size_formatted: The human-readable file size (e.g., 1 KB, 2.5 MB).
  • mimetype: The MIME type of the file (e.g., application/pdf, image/jpeg). Empty for folders and other unknown types.
  • type: Indicates whether the item is a file or a folder.
  • type_detail: Additional type information, including the file extension.
  • date_modified: The last modified date as a datetime object.
  • date_modified_str: The last modified date in a human-readable string format.
  • full_path: The absolute path to the file or folder (e.g., /home/user/documents/report.pdf).
  • relative_path: The path relative to a specified base directory (e.g., documents/report.pdf).

Outputs Types

Input Types
paths List, DataFrame
items data DataRecords, DataFrame
items count Int

You can check the list of supported types here: Available Type Hints.

Options

The Directory Explorer brick contains some changeable options:

Include Subfolders
Activating the recursive search through the entire directory.
Regex Pattern
A regex expression used to filter the returned textual paths. If left empty, no filtering is applied. This option is ignored if the brick has a connection to its filter pattern input handle.
Sort By
Choosing which metadata value to use for sorting the outputs (name, size, modification date or path type ).
Ascending Sort
If active, the sorting is ascending.
Paths Type
Choosing the data type of the paths output, either as a list or a dataframe.
Items Type
Choosing the data type of the items data output, either as a list of records or a dataframe.
Verbose
Enables or disables log output for this brick.
import os
import re
import logging
import mimetypes
import pandas as pd
from datetime import datetime
from coded_flows.types import (
    Str,
    DirectoryPath,
    DataRecords,
    DataFrame,
    List,
    Int,
    Tuple,
    Union,
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def _is_regex(text: str) -> bool:
    try:
        re.compile(text)
        return True
    except re.error:
        return False


def _validate_and_raise(condition, error_type, message, brick_display_name, verbose):
    if not condition:
        verbose and logger.error(f"[{brick_display_name}] {message}")
        raise error_type(message)


def _coalesce(*values):
    return next((v for v in values if v is not None))


def _get_item_metadata(path):
    """Extract metadata for a file or folder."""
    try:
        stat_info = path.stat()
        if path.is_file():
            item_type = "File"
            extension = path.suffix.lower() if path.suffix else "No extension"
            type_detail = (
                f"File ({extension})" if extension != "No extension" else "File"
            )
        elif path.is_dir():
            item_type = "Folder"
            type_detail = "Folder"
        else:
            item_type = "Other"
            type_detail = "Other"
        size_bytes = stat_info.st_size if path.is_file() else 0
        size_formatted = _format_file_size(size_bytes)
        modified_timestamp = stat_info.st_mtime
        modified_date = datetime.fromtimestamp(modified_timestamp)
        mime_type = mimetypes.guess_type(path)[0]
        return {
            "name": path.name,
            "size_bytes": size_bytes,
            "size_formatted": size_formatted,
            "mimetype": mime_type,
            "type": item_type,
            "type_detail": type_detail,
            "date_modified": modified_date,
            "date_modified_str": modified_date.strftime("%Y-%m-%d %H:%M:%S"),
            "full_path": str(path) + os.sep if path.is_dir() else str(path),
            "relative_path": (
                str(path.relative_to(path.parent)) if path.parent != path else str(path)
            ),
        }
    except (OSError, PermissionError):
        return None


def _format_file_size(size_bytes):
    """Format file size in human-readable format."""
    if size_bytes == 0:
        return "0 B"
    size_names = ["B", "KB", "MB", "GB", "TB"]
    size = float(size_bytes)
    i = 0
    while size >= 1024.0 and i < len(size_names) - 1:
        size /= 1024.0
        i += 1
    return f"{size:.1f} {size_names[i]}"


def _sort_items(items, sort_by, ascending=True):
    """Sort items by the specified criteria with optional ascending/descending order."""
    sort_key_map = {
        "name": lambda x: x["name"].lower(),
        "size": lambda x: x["size_bytes"],
        "date": lambda x: x["date_modified"],
        "type": lambda x: (x["type"], x["name"].lower()),
    }
    sort_key = sort_key_map.get(sort_by.lower())
    if not sort_key:
        print(
            f"Warning: Unknown sort criteria '{sort_by}'. Available options: name, size, date, type"
        )
        return items
    reverse = not ascending
    return sorted(items, key=sort_key, reverse=reverse)


def directory_explorer(
    folder_path: Union[Str, DirectoryPath], filter_pattern: Str = None, options=None
) -> Tuple[Union[List, DataFrame], Union[DataRecords, DataFrame], Int]:
    options = options or {}
    brick_display_name = "Directory Explorer"
    filter_pattern = _coalesce(filter_pattern, options.get("filter_pattern", ""))
    include_subfolders = options.get("include_subfolders", False)
    sort_by = options.get("sort_by", "name")
    paths_list_type = options.get("paths_list_type", "list")
    items_metadata_type = options.get("items_metadata_type", "records")
    ascending = options.get("ascending", True)
    verbose = options.get("verbose", True)
    base_path = DirectoryPath(folder_path).resolve()
    _validate_and_raise(
        base_path.exists(),
        FileNotFoundError,
        f"Folder not found: {folder_path}",
        brick_display_name,
        verbose,
    )
    _validate_and_raise(
        base_path.is_dir(),
        ValueError,
        f"Path is not a directory: {folder_path}",
        brick_display_name,
        verbose,
    )
    _validate_and_raise(
        _is_regex(filter_pattern),
        ValueError,
        "'filter patterns' must be a valid regex.",
        brick_display_name,
        verbose,
    )
    verbose and logger.info(
        f"[{brick_display_name}] Exploring the folder '{base_path}'."
    )
    if filter_pattern:
        verbose and logger.info(
            f"[{brick_display_name}] Items will be filtered using the regex '{filter_pattern}'."
        )
    items_data = []
    verbose and logger.info(f"[{brick_display_name}] Collecting items...")
    glob_pattern = "**/*" if include_subfolders else "*"
    compiled_re = re.compile(filter_pattern)
    items_data = (
        [
            md
            for p in base_path.glob(glob_pattern)
            if compiled_re.match(p.name) and (md := _get_item_metadata(p)) is not None
        ]
        if filter_pattern
        else [
            md
            for p in base_path.glob(glob_pattern)
            if (md := _get_item_metadata(p)) is not None
        ]
    )
    items_count = len(items_data)
    verbose and logger.info(
        f"[{brick_display_name}] {items_count} element{('s' if items_count > 1 else '')} found."
    )
    items_data = _sort_items(items_data, sort_by, ascending)
    paths = [item["full_path"] for item in items_data]
    if paths_list_type == "dataframe":
        verbose and logger.info(
            f"[{brick_display_name}] Converting paths output to a dataframe."
        )
        paths = pd.DataFrame(paths, columns=["paths"])
    if items_metadata_type == "dataframe":
        verbose and logger.info(
            f"[{brick_display_name}] Converting items output to a dataframe."
        )
        items_data = pd.DataFrame(items_data)
    return (paths, items_data, items_count)

Brick Info

version v0.1.4
python 3.10, 3.11, 3.12, 3.13
requirements
    -