Get Info
Retrieves information about files or folders.
Get Info
Processing
Retrieves detailed metadata about files or folders, including size, type, modification date, permissions, and more. Processes single or multiple paths, sorting results based on specified criteria.
Inputs
- paths
- A single path or a list of paths to files or folders for metadata extraction.
Inputs Types
Input | Types |
---|---|
paths |
Str , Path , List |
You can check the list of supported types here: Available Type Hints.
Outputs
- info data
- Metadata for the processed files or folders, returned as either a list of records or a DataFrame based on the
Info Data Type
option.
The info data
output contains the following metadata:
- name: The name of the file or folder.
- size_bytes: The size in bytes (files) or total size of contents (folders).
- size_formatted: Human-readable size (e.g., "1.2 MB").
- mimetype: MIME type of the file, if applicable.
- type: General type, either "File", "Folder", or "Other".
- type_detail: Detailed type, e.g., "File (.txt)" or "Folder".
- date_modified: Datetime object of the last modification.
- date_modified_str: Formatted modification date (YYYY-MM-DD HH:MM:SS).
- permissions_octal: File permissions in octal format (e.g., "755").
- permissions_string: Readable permissions (e.g., "rwxr-xr-x").
- full_path: Absolute path to the file or folder (includes trailing separator for folders).
- relative_path: Name of the file or folder relative to its parent directory.
Outputs Types
Output | Types |
---|---|
info data |
DataRecords , DataFrame |
You can check the list of supported types here: Available Type Hints.
Options
The Get Info brick contains some changeable options:
- Sort By
- Specifies the sorting criterion for the output metadata (e.g., by name, size, date, or type).
- Ascending Sort
- Toggles whether sorting is in ascending (True) or descending (False) order.
- Info Data Type
- Determines the output format for metadata, either as a list of records or a DataFrame.
- Verbose
- Enables or disables detailed logging of the process.
import os
import stat
import logging
import mimetypes
import pandas as pd
from pathlib import Path
from datetime import datetime
from coded_flows.types import Str, DataRecords, DataFrame, List, Union
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def _calculate_folder_size(path):
"""Calculate the total size of a folder by recursively summing all files."""
total_size = 0
try:
for root, dirs, files in os.walk(path):
for file in files:
try:
file_path = os.path.join(root, file)
total_size += os.path.getsize(file_path)
except (OSError, FileNotFoundError):
continue
except (OSError, PermissionError):
return 0
return total_size
def _get_permissions_string(mode):
"""Convert file mode to readable permissions string."""
permissions = []
permissions.append("r" if mode & stat.S_IRUSR else "-")
permissions.append("w" if mode & stat.S_IWUSR else "-")
permissions.append("x" if mode & stat.S_IXUSR else "-")
permissions.append("r" if mode & stat.S_IRGRP else "-")
permissions.append("w" if mode & stat.S_IWGRP else "-")
permissions.append("x" if mode & stat.S_IXGRP else "-")
permissions.append("r" if mode & stat.S_IROTH else "-")
permissions.append("w" if mode & stat.S_IWOTH else "-")
permissions.append("x" if mode & stat.S_IXOTH else "-")
return "".join(permissions)
def _get_item_metadata(path, brick_display_name, verbose):
"""Extract metadata for a file or folder."""
try:
path = Path(path)
stat_info = path.stat()
if path.is_file():
item_type = "File"
extension = path.suffix.lower() if path.suffix else "No extension"
type_detail = (
f"File ({extension})" if extension != "No extension" else "File"
)
size_bytes = stat_info.st_size
elif path.is_dir():
item_type = "Folder"
type_detail = "Folder"
size_bytes = _calculate_folder_size(path)
else:
item_type = "Other"
type_detail = "Other"
size_bytes = 0
size_formatted = _format_file_size(size_bytes)
modified_timestamp = stat_info.st_mtime
modified_date = datetime.fromtimestamp(modified_timestamp)
permissions_octal = oct(stat_info.st_mode)[-3:]
permissions_string = _get_permissions_string(stat_info.st_mode)
mime_type = mimetypes.guess_type(path)[0] if path.is_file() else None
return {
"name": path.name,
"size_bytes": size_bytes,
"size_formatted": size_formatted,
"mimetype": mime_type,
"type": item_type,
"type_detail": type_detail,
"date_modified": modified_date,
"date_modified_str": modified_date.strftime("%Y-%m-%d %H:%M:%S"),
"permissions_octal": permissions_octal,
"permissions_string": permissions_string,
"full_path": str(path) + os.sep if path.is_dir() else str(path),
"relative_path": path.name,
}
except (OSError, PermissionError, FileNotFoundError) as e:
verbose and logger.warning(
f"[{brick_display_name}] Could not access path {path}: {e}"
)
return None
def _format_file_size(size_bytes):
"""Format file size in human-readable format."""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
size = float(size_bytes)
i = 0
while size >= 1024.0 and i < len(size_names) - 1:
size /= 1024.0
i += 1
return f"{size:.1f} {size_names[i]}"
def _sort_items(items, sort_by, ascending=True):
"""Sort items by the specified criteria with optional ascending/descending order."""
sort_key_map = {
"name": lambda x: x["name"].lower(),
"size": lambda x: x["size_bytes"],
"date": lambda x: x["date_modified"],
"type": lambda x: (x["type"], x["name"].lower()),
}
sort_key = sort_key_map.get(sort_by.lower())
reverse = not ascending
return sorted(items, key=sort_key, reverse=reverse)
def _validate_source_input(src):
"""Validate that source input contains only strings and Path objects."""
if isinstance(src, (str, Path)):
return True
elif isinstance(src, list):
return all((isinstance(item, (str, Path)) for item in src))
return False
def get_info(
paths: Union[Str, Path, List], options=None
) -> Union[DataRecords, DataFrame]:
options = options or {}
brick_display_name = "Get Info"
sort_by = options.get("sort_by", "name")
info_metadata_type = options.get("info_metadata_type", "records")
ascending = options.get("ascending", True)
verbose = options.get("verbose", True)
if not _validate_source_input(paths):
verbose and logger.error(
f"[{brick_display_name}] Invalid source input. Must be Str, Path, or list of Str/Path objects."
)
raise ValueError(
"Invalid source input. Must be Str, Path, or list of Str/Path objects."
)
if isinstance(paths, (str, Path)):
path_list = [Path(paths)]
else:
path_list = paths
info_data = []
verbose and logger.info(f"[{brick_display_name}] Collecting metadata info...")
for path in path_list:
if not path.exists():
verbose and logger.warning(
f"[{brick_display_name}] Path does not exist: {path}"
)
continue
metadata = _get_item_metadata(path, brick_display_name, verbose)
if metadata is not None:
info_data.append(metadata)
items_count = len(info_data)
verbose and logger.info(
f"[{brick_display_name}] {items_count} element{('s' if items_count != 1 else '')} processed."
)
info_data = _sort_items(info_data, sort_by, ascending)
if info_metadata_type == "dataframe":
verbose and logger.info(
f"[{brick_display_name}] Converting items output to a dataframe."
)
info_data = pd.DataFrame(info_data)
return info_data
Brick Info
version
v0.1.4
python
3.10,
3.11,
3.12,
3.13
requirements
-
-