Directory Explorer
Lists files/folders in a folder, with optional filtering, subfolder search, and sorting.
Directory Explorer
Processing
Examines the specified folder and returns a list of all items (files and/or folders) that match the given criteria. If subfolders are included, it recursively searches through the entire directory tree.
Inputs
- folder path
- The specified folder path for exploration.
- filter pattern (optional)
- A regex expression to filter the returned textual paths. If empty, no filtering applied.
Inputs Types
Input | Types |
---|---|
folder path |
Str , DirectoryPath |
filter pattern |
Str |
You can check the list of supported types here: Available Type Hints.
Outputs
- paths
- The list of absolute paths.
- items data
- The enriched list of paths with metadata, like size, mimetypes, relative path etc...
- items count
- The total number of items found.
The items data
output contains the following metadata:
- name: The name of the file or folder (e.g., document.pdf, images).
- size_bytes: The size of the file in bytes (e.g., 1024). For folders, this is usually 0 or not applicable.
- size_formatted: The human-readable file size (e.g., 1 KB, 2.5 MB).
- mimetype: The MIME type of the file (e.g., application/pdf, image/jpeg). Empty for folders and other unknown types.
- type: Indicates whether the item is a file or a folder.
- type_detail: Additional type information, including the file extension.
- date_modified: The last modified date as a datetime object.
- date_modified_str: The last modified date in a human-readable string format.
- full_path: The absolute path to the file or folder (e.g., /home/user/documents/report.pdf).
- relative_path: The path relative to a specified base directory (e.g., documents/report.pdf).
Outputs Types
Input | Types |
---|---|
paths |
List , DataFrame |
items data |
DataRecords , DataFrame |
items count |
Int |
You can check the list of supported types here: Available Type Hints.
Options
The Directory Explorer brick contains some changeable options:
- Include Subfolders
- Activating the recursive search through the entire directory.
- Regex Pattern
- A regex expression used to filter the returned textual paths. If left empty, no filtering is applied. This option is ignored if the brick has a connection to its
filter pattern
input handle. - Sort By
- Choosing which metadata value to use for sorting the outputs (name, size, modification date or path type ).
- Ascending Sort
- If active, the sorting is ascending.
- Paths Type
- Choosing the data type of the
paths
output, either as a list or a dataframe. - Items Type
- Choosing the data type of the
items data
output, either as a list of records or a dataframe. - Verbose
- Enables or disables log output for this brick.
import os
import re
import logging
import mimetypes
import pandas as pd
from datetime import datetime
from coded_flows.types import (
Str,
DirectoryPath,
DataRecords,
DataFrame,
List,
Int,
Tuple,
Union,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def _is_regex(text: str) -> bool:
try:
re.compile(text)
return True
except re.error:
return False
def _validate_and_raise(condition, error_type, message, brick_display_name, verbose):
if not condition:
verbose and logger.error(f"[{brick_display_name}] {message}")
raise error_type(message)
def _coalesce(*values):
return next((v for v in values if v is not None))
def _get_item_metadata(path):
"""Extract metadata for a file or folder."""
try:
stat_info = path.stat()
if path.is_file():
item_type = "File"
extension = path.suffix.lower() if path.suffix else "No extension"
type_detail = (
f"File ({extension})" if extension != "No extension" else "File"
)
elif path.is_dir():
item_type = "Folder"
type_detail = "Folder"
else:
item_type = "Other"
type_detail = "Other"
size_bytes = stat_info.st_size if path.is_file() else 0
size_formatted = _format_file_size(size_bytes)
modified_timestamp = stat_info.st_mtime
modified_date = datetime.fromtimestamp(modified_timestamp)
mime_type = mimetypes.guess_type(path)[0]
return {
"name": path.name,
"size_bytes": size_bytes,
"size_formatted": size_formatted,
"mimetype": mime_type,
"type": item_type,
"type_detail": type_detail,
"date_modified": modified_date,
"date_modified_str": modified_date.strftime("%Y-%m-%d %H:%M:%S"),
"full_path": str(path) + os.sep if path.is_dir() else str(path),
"relative_path": (
str(path.relative_to(path.parent)) if path.parent != path else str(path)
),
}
except (OSError, PermissionError):
return None
def _format_file_size(size_bytes):
"""Format file size in human-readable format."""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
size = float(size_bytes)
i = 0
while size >= 1024.0 and i < len(size_names) - 1:
size /= 1024.0
i += 1
return f"{size:.1f} {size_names[i]}"
def _sort_items(items, sort_by, ascending=True):
"""Sort items by the specified criteria with optional ascending/descending order."""
sort_key_map = {
"name": lambda x: x["name"].lower(),
"size": lambda x: x["size_bytes"],
"date": lambda x: x["date_modified"],
"type": lambda x: (x["type"], x["name"].lower()),
}
sort_key = sort_key_map.get(sort_by.lower())
if not sort_key:
print(
f"Warning: Unknown sort criteria '{sort_by}'. Available options: name, size, date, type"
)
return items
reverse = not ascending
return sorted(items, key=sort_key, reverse=reverse)
def directory_explorer(
folder_path: Union[Str, DirectoryPath], filter_pattern: Str = None, options=None
) -> Tuple[Union[List, DataFrame], Union[DataRecords, DataFrame], Int]:
options = options or {}
brick_display_name = "Directory Explorer"
filter_pattern = _coalesce(filter_pattern, options.get("filter_pattern", ""))
include_subfolders = options.get("include_subfolders", False)
sort_by = options.get("sort_by", "name")
paths_list_type = options.get("paths_list_type", "list")
items_metadata_type = options.get("items_metadata_type", "records")
ascending = options.get("ascending", True)
verbose = options.get("verbose", True)
base_path = DirectoryPath(folder_path).resolve()
_validate_and_raise(
base_path.exists(),
FileNotFoundError,
f"Folder not found: {folder_path}",
brick_display_name,
verbose,
)
_validate_and_raise(
base_path.is_dir(),
ValueError,
f"Path is not a directory: {folder_path}",
brick_display_name,
verbose,
)
_validate_and_raise(
_is_regex(filter_pattern),
ValueError,
"'filter patterns' must be a valid regex.",
brick_display_name,
verbose,
)
verbose and logger.info(
f"[{brick_display_name}] Exploring the folder '{base_path}'."
)
if filter_pattern:
verbose and logger.info(
f"[{brick_display_name}] Items will be filtered using the regex '{filter_pattern}'."
)
items_data = []
verbose and logger.info(f"[{brick_display_name}] Collecting items...")
glob_pattern = "**/*" if include_subfolders else "*"
compiled_re = re.compile(filter_pattern)
items_data = (
[
md
for p in base_path.glob(glob_pattern)
if compiled_re.match(p.name) and (md := _get_item_metadata(p)) is not None
]
if filter_pattern
else [
md
for p in base_path.glob(glob_pattern)
if (md := _get_item_metadata(p)) is not None
]
)
items_count = len(items_data)
verbose and logger.info(
f"[{brick_display_name}] {items_count} element{('s' if items_count > 1 else '')} found."
)
items_data = _sort_items(items_data, sort_by, ascending)
paths = [item["full_path"] for item in items_data]
if paths_list_type == "dataframe":
verbose and logger.info(
f"[{brick_display_name}] Converting paths output to a dataframe."
)
paths = pd.DataFrame(paths, columns=["paths"])
if items_metadata_type == "dataframe":
verbose and logger.info(
f"[{brick_display_name}] Converting items output to a dataframe."
)
items_data = pd.DataFrame(items_data)
return (paths, items_data, items_count)
Brick Info
version
v0.1.4
python
3.10,
3.11,
3.12,
3.13
requirements
-
-