Get Info

Retrieves information about files or folders.

Get Info


Processing

Retrieves detailed metadata about files or folders, including size, type, modification date, permissions, and more. Processes single or multiple paths, sorting results based on specified criteria.

Inputs

paths
A single path or a list of paths to files or folders for metadata extraction.

Inputs Types

Input Types
paths Str, Path, List

You can check the list of supported types here: Available Type Hints.

Outputs

info data
Metadata for the processed files or folders, returned as either a list of records or a DataFrame based on the Info Data Type option.

The info data output contains the following metadata:

  • name: The name of the file or folder.
  • size_bytes: The size in bytes (files) or total size of contents (folders).
  • size_formatted: Human-readable size (e.g., "1.2 MB").
  • mimetype: MIME type of the file, if applicable.
  • type: General type, either "File", "Folder", or "Other".
  • type_detail: Detailed type, e.g., "File (.txt)" or "Folder".
  • date_modified: Datetime object of the last modification.
  • date_modified_str: Formatted modification date (YYYY-MM-DD HH:MM:SS).
  • permissions_octal: File permissions in octal format (e.g., "755").
  • permissions_string: Readable permissions (e.g., "rwxr-xr-x").
  • full_path: Absolute path to the file or folder (includes trailing separator for folders).
  • relative_path: Name of the file or folder relative to its parent directory.

Outputs Types

Output Types
info data DataRecords, DataFrame

You can check the list of supported types here: Available Type Hints.

Options

The Get Info brick contains some changeable options:

Sort By
Specifies the sorting criterion for the output metadata (e.g., by name, size, date, or type).
Ascending Sort
Toggles whether sorting is in ascending (True) or descending (False) order.
Info Data Type
Determines the output format for metadata, either as a list of records or a DataFrame.
Verbose
Enables or disables detailed logging of the process.
import os
import stat
import logging
import mimetypes
import pandas as pd
from pathlib import Path
from datetime import datetime
from coded_flows.types import Str, DataRecords, DataFrame, List, Union

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def _calculate_folder_size(path):
    """Calculate the total size of a folder by recursively summing all files."""
    total_size = 0
    try:
        for root, dirs, files in os.walk(path):
            for file in files:
                try:
                    file_path = os.path.join(root, file)
                    total_size += os.path.getsize(file_path)
                except (OSError, FileNotFoundError):
                    continue
    except (OSError, PermissionError):
        return 0
    return total_size


def _get_permissions_string(mode):
    """Convert file mode to readable permissions string."""
    permissions = []
    permissions.append("r" if mode & stat.S_IRUSR else "-")
    permissions.append("w" if mode & stat.S_IWUSR else "-")
    permissions.append("x" if mode & stat.S_IXUSR else "-")
    permissions.append("r" if mode & stat.S_IRGRP else "-")
    permissions.append("w" if mode & stat.S_IWGRP else "-")
    permissions.append("x" if mode & stat.S_IXGRP else "-")
    permissions.append("r" if mode & stat.S_IROTH else "-")
    permissions.append("w" if mode & stat.S_IWOTH else "-")
    permissions.append("x" if mode & stat.S_IXOTH else "-")
    return "".join(permissions)


def _get_item_metadata(path, brick_display_name, verbose):
    """Extract metadata for a file or folder."""
    try:
        path = Path(path)
        stat_info = path.stat()
        if path.is_file():
            item_type = "File"
            extension = path.suffix.lower() if path.suffix else "No extension"
            type_detail = (
                f"File ({extension})" if extension != "No extension" else "File"
            )
            size_bytes = stat_info.st_size
        elif path.is_dir():
            item_type = "Folder"
            type_detail = "Folder"
            size_bytes = _calculate_folder_size(path)
        else:
            item_type = "Other"
            type_detail = "Other"
            size_bytes = 0
        size_formatted = _format_file_size(size_bytes)
        modified_timestamp = stat_info.st_mtime
        modified_date = datetime.fromtimestamp(modified_timestamp)
        permissions_octal = oct(stat_info.st_mode)[-3:]
        permissions_string = _get_permissions_string(stat_info.st_mode)
        mime_type = mimetypes.guess_type(path)[0] if path.is_file() else None
        return {
            "name": path.name,
            "size_bytes": size_bytes,
            "size_formatted": size_formatted,
            "mimetype": mime_type,
            "type": item_type,
            "type_detail": type_detail,
            "date_modified": modified_date,
            "date_modified_str": modified_date.strftime("%Y-%m-%d %H:%M:%S"),
            "permissions_octal": permissions_octal,
            "permissions_string": permissions_string,
            "full_path": str(path) + os.sep if path.is_dir() else str(path),
            "relative_path": path.name,
        }
    except (OSError, PermissionError, FileNotFoundError) as e:
        verbose and logger.warning(
            f"[{brick_display_name}] Could not access path {path}: {e}"
        )
        return None


def _format_file_size(size_bytes):
    """Format file size in human-readable format."""
    if size_bytes == 0:
        return "0 B"
    size_names = ["B", "KB", "MB", "GB", "TB"]
    size = float(size_bytes)
    i = 0
    while size >= 1024.0 and i < len(size_names) - 1:
        size /= 1024.0
        i += 1
    return f"{size:.1f} {size_names[i]}"


def _sort_items(items, sort_by, ascending=True):
    """Sort items by the specified criteria with optional ascending/descending order."""
    sort_key_map = {
        "name": lambda x: x["name"].lower(),
        "size": lambda x: x["size_bytes"],
        "date": lambda x: x["date_modified"],
        "type": lambda x: (x["type"], x["name"].lower()),
    }
    sort_key = sort_key_map.get(sort_by.lower())
    reverse = not ascending
    return sorted(items, key=sort_key, reverse=reverse)


def _validate_source_input(src):
    """Validate that source input contains only strings and Path objects."""
    if isinstance(src, (str, Path)):
        return True
    elif isinstance(src, list):
        return all((isinstance(item, (str, Path)) for item in src))
    return False


def get_info(
    paths: Union[Str, Path, List], options=None
) -> Union[DataRecords, DataFrame]:
    options = options or {}
    brick_display_name = "Get Info"
    sort_by = options.get("sort_by", "name")
    info_metadata_type = options.get("info_metadata_type", "records")
    ascending = options.get("ascending", True)
    verbose = options.get("verbose", True)
    if not _validate_source_input(paths):
        verbose and logger.error(
            f"[{brick_display_name}] Invalid source input. Must be Str, Path, or list of Str/Path objects."
        )
        raise ValueError(
            "Invalid source input. Must be Str, Path, or list of Str/Path objects."
        )
    if isinstance(paths, (str, Path)):
        path_list = [Path(paths)]
    else:
        path_list = paths
    info_data = []
    verbose and logger.info(f"[{brick_display_name}] Collecting metadata info...")
    for path in path_list:
        if not path.exists():
            verbose and logger.warning(
                f"[{brick_display_name}] Path does not exist: {path}"
            )
            continue
        metadata = _get_item_metadata(path, brick_display_name, verbose)
        if metadata is not None:
            info_data.append(metadata)
    items_count = len(info_data)
    verbose and logger.info(
        f"[{brick_display_name}] {items_count} element{('s' if items_count != 1 else '')} processed."
    )
    info_data = _sort_items(info_data, sort_by, ascending)
    if info_metadata_type == "dataframe":
        verbose and logger.info(
            f"[{brick_display_name}] Converting items output to a dataframe."
        )
        info_data = pd.DataFrame(info_data)
    return info_data

Brick Info

version v0.1.4
python 3.10, 3.11, 3.12, 3.13
requirements
    -