Skip to content

Best Practices

Guidelines and recommendations for using Bytesize effectively in production applications.

๐ŸŽฏ Core Principles

1. Choose the Right Unit Type

Binary Units for system resources and file systems:

# โœ… Good - Binary units for memory
ram = Storage(16, StorageUnit.GIB)
cache_size = Storage(512, StorageUnit.MIB)

# โœ… Good - Binary units for file operations
file_size = Storage.get_size_from_path("large_file.bin")
if file_size.convert_to_gib().value > 4:
    print("Large file detected")

Decimal Units for user-facing displays and storage devices:

# โœ… Good - Decimal units for storage devices
disk_capacity = Storage(500, StorageUnit.GB)
available_space = Storage(234.5, StorageUnit.GB)

# โœ… Good - Decimal units for download sizes
download_size = Storage.parse("1.2 GB")

Bit Units for network and bandwidth:

# โœ… Good - Bit units for network speeds
connection_speed = Storage(100, StorageUnit.MEGABITS)
throughput = Storage(1, StorageUnit.GIGABITS)

2. Consistent Error Handling

Always handle potential exceptions:

# โœ… Good - Comprehensive error handling
def get_directory_size(path: str) -> Optional[Storage]:
    """Get directory size with proper error handling."""
    try:
        return Storage.get_size_from_path(path)
    except FileNotFoundError:
        logging.warning(f"Directory not found: {path}")
        return None
    except PermissionError:
        logging.warning(f"Permission denied: {path}")
        return None
    except OSError as e:
        logging.error(f"OS error for {path}: {e}")
        return None

# โœ… Good - Safe parsing with validation
def parse_user_input(size_string: str) -> Storage:
    """Parse user input with validation."""
    if not size_string.strip():
        raise ValueError("Size string cannot be empty")

    try:
        storage = Storage.parse(size_string)
        if storage.convert_to_bytes() < 0:
            raise ValueError("Size cannot be negative")
        return storage
    except ValueError as e:
        raise ValueError(f"Invalid size format '{size_string}': {e}")

3. Use Auto-Scaling for Display

Always use auto_scale() for human-readable output:

# โœ… Good - Auto-scaling for display
def display_file_info(file_path: str):
    """Display file information in readable format."""
    size = Storage.get_size_from_path(file_path)
    print(f"File: {file_path}")
    print(f"Size: {size.auto_scale()}")  # Automatically chooses best unit

# โŒ Bad - Fixed unit might be inappropriate
def bad_display(file_path: str):
    size = Storage.get_size_from_path(file_path)
    print(f"Size: {size.convert_to_mb()}")  # Always MB, even for KB files

๐Ÿ›ก๏ธ Error Handling Patterns

Defensive Programming

class SafeStorageCalculator:
    """Calculator with defensive programming practices."""

    @staticmethod
    def safe_addition(*storages: Storage) -> Storage:
        """Add storages with overflow protection."""
        if not storages:
            return Storage(0, StorageUnit.BYTES)

        total = Storage(0, StorageUnit.BYTES)
        for storage in storages:
            if not isinstance(storage, Storage):
                raise TypeError(f"Expected Storage, got {type(storage)}")
            total += storage

        return total

    @staticmethod
    def safe_division(dividend: Storage, divisor: Union[Storage, float]) -> float:
        """Divide with zero-division protection."""
        if isinstance(divisor, Storage):
            divisor_value = divisor.convert_to_bytes()
        else:
            divisor_value = float(divisor)

        if divisor_value == 0:
            raise ZeroDivisionError("Cannot divide by zero")

        return dividend.convert_to_bytes() / divisor_value

    @staticmethod
    def validate_storage_range(
        storage: Storage, 
        min_size: Storage, 
        max_size: Storage
    ) -> bool:
        """Validate storage is within acceptable range."""
        if min_size > max_size:
            raise ValueError("min_size cannot be greater than max_size")

        return min_size <= storage <= max_size

Graceful Degradation

def get_disk_usage(path: str) -> Dict[str, Optional[Storage]]:
    """Get disk usage with graceful degradation."""
    result = {
        'total': None,
        'used': None,
        'free': None,
        'error': None
    }

    try:
        # Try platform-optimized approach first
        platform_storage = Storage.get_platform_storage()
        total = platform_storage.get_size_from_path(path)
        result['total'] = total

        # Calculate other metrics if possible
        # ... additional logic

    except Exception as e:
        result['error'] = str(e)
        logging.warning(f"Could not get disk usage for {path}: {e}")

        # Fallback to basic approach
        try:
            import shutil
            total, used, free = shutil.disk_usage(path)
            result['total'] = Storage.parse_from_bytes(total)
            result['used'] = Storage.parse_from_bytes(used)
            result['free'] = Storage.parse_from_bytes(free)
        except Exception as fallback_error:
            result['error'] = f"Primary: {e}, Fallback: {fallback_error}"

    return result

๐ŸŽฏ Performance Optimization

Efficient File Operations

class EfficientFileAnalyzer:
    """File analyzer optimized for performance."""

    def __init__(self):
        # Cache platform storage instance
        self.platform_storage = Storage.get_platform_storage()
        self._size_cache = {}

    def analyze_directory_tree(self, root_path: str, use_cache: bool = True) -> Dict[str, Any]:
        """Analyze directory tree efficiently."""
        results = {
            'total_size': Storage(0, StorageUnit.BYTES),
            'file_count': 0,
            'directory_count': 0,
            'file_sizes': []
        }

        for item_path in Path(root_path).rglob('*'):
            if item_path.is_file():
                # Use cache if enabled
                cache_key = str(item_path.absolute())
                if use_cache and cache_key in self._size_cache:
                    size = self._size_cache[cache_key]
                else:
                    size = self.platform_storage.get_size_from_path(item_path)
                    if use_cache:
                        self._size_cache[cache_key] = size

                results['total_size'] += size
                results['file_count'] += 1
                results['file_sizes'].append(size)

            elif item_path.is_dir():
                results['directory_count'] += 1

        return results

    def clear_cache(self):
        """Clear the size cache."""
        self._size_cache.clear()

Batch Operations

def process_files_efficiently(file_paths: List[str]) -> Dict[str, Storage]:
    """Process multiple files efficiently."""
    # Use single platform storage instance
    platform_storage = Storage.get_platform_storage()

    # Process in batches to manage memory
    batch_size = 1000
    results = {}

    for i in range(0, len(file_paths), batch_size):
        batch = file_paths[i:i + batch_size]

        for file_path in batch:
            try:
                size = platform_storage.get_size_from_path(file_path)
                results[file_path] = size
            except (FileNotFoundError, PermissionError):
                # Skip inaccessible files
                continue

    return results

Memory-Efficient Operations

def calculate_large_directory_size(directory: str) -> Storage:
    """Calculate directory size without loading all paths into memory."""
    total_size = Storage(0, StorageUnit.BYTES)
    platform_storage = Storage.get_platform_storage()

    # Generator-based approach for memory efficiency
    def file_generator():
        for item_path in Path(directory).rglob('*'):
            if item_path.is_file():
                yield item_path

    # Process files one at a time
    for file_path in file_generator():
        try:
            size = platform_storage.get_size_from_path(file_path)
            total_size += size
        except (FileNotFoundError, PermissionError):
            continue  # Skip inaccessible files

    return total_size

๐Ÿ”ง Configuration Management

Application Settings

from dataclasses import dataclass
from typing import Optional

@dataclass
class StorageSettings:
    """Application storage settings."""
    default_unit_type: str = "binary"  # "binary", "decimal", "bits"
    display_precision: int = 2
    max_file_size: Storage = Storage(100, StorageUnit.MB)
    cache_enabled: bool = True
    platform_optimizations: bool = True

    def __post_init__(self):
        """Validate settings after initialization."""
        valid_unit_types = {"binary", "decimal", "bits"}
        if self.default_unit_type not in valid_unit_types:
            raise ValueError(f"Invalid unit type: {self.default_unit_type}")

        if self.display_precision < 0:
            raise ValueError("Display precision cannot be negative")

class StorageManager:
    """Centralized storage management with configuration."""

    def __init__(self, settings: Optional[StorageSettings] = None):
        self.settings = settings or StorageSettings()
        self._platform_storage = None

    @property
    def platform_storage(self):
        """Lazy-loaded platform storage."""
        if self._platform_storage is None:
            if self.settings.platform_optimizations:
                self._platform_storage = Storage.get_platform_storage()
            else:
                # Use basic storage implementation
                from bytesize.platform_storage import DefaultPlatformStorage
                self._platform_storage = DefaultPlatformStorage()
        return self._platform_storage

    def format_size(self, storage: Storage) -> str:
        """Format size according to settings."""
        scaled = storage.auto_scale(
            prefer_binary=(self.settings.default_unit_type == "binary")
        )
        return f"{scaled.value:.{self.settings.display_precision}f} {scaled.unit.name}"

    def validate_file_size(self, file_path: str) -> bool:
        """Validate file size against settings."""
        try:
            file_size = self.platform_storage.get_size_from_path(file_path)
            return file_size <= self.settings.max_file_size
        except (FileNotFoundError, PermissionError):
            return False

๐Ÿ“Š Logging and Monitoring

Structured Logging

import logging
import json
from datetime import datetime

class StorageLogger:
    """Structured logging for storage operations."""

    def __init__(self, logger_name: str = "bytesize"):
        self.logger = logging.getLogger(logger_name)

    def log_file_operation(self, operation: str, file_path: str, 
                          size: Optional[Storage] = None, 
                          duration: Optional[float] = None,
                          error: Optional[str] = None):
        """Log file operation with structured data."""
        log_data = {
            'timestamp': datetime.utcnow().isoformat(),
            'operation': operation,
            'file_path': file_path,
            'size_bytes': size.convert_to_bytes() if size else None,
            'size_readable': str(size.auto_scale()) if size else None,
            'duration_seconds': duration,
            'error': error,
            'success': error is None
        }

        if error:
            self.logger.error(f"Storage operation failed: {json.dumps(log_data)}")
        else:
            self.logger.info(f"Storage operation completed: {json.dumps(log_data)}")

    def log_performance_metrics(self, operation: str, file_count: int, 
                               total_size: Storage, duration: float):
        """Log performance metrics."""
        metrics = {
            'timestamp': datetime.utcnow().isoformat(),
            'operation': operation,
            'file_count': file_count,
            'total_size_bytes': total_size.convert_to_bytes(),
            'total_size_readable': str(total_size.auto_scale()),
            'duration_seconds': duration,
            'files_per_second': file_count / duration if duration > 0 else 0,
            'bytes_per_second': total_size.convert_to_bytes() / duration if duration > 0 else 0
        }

        self.logger.info(f"Performance metrics: {json.dumps(metrics)}")

# Usage example
def monitored_file_scan(directory: str) -> Dict[str, Any]:
    """File scan with comprehensive monitoring."""
    logger = StorageLogger()
    start_time = time.time()

    try:
        total_size = Storage(0, StorageUnit.BYTES)
        file_count = 0

        for file_path in Path(directory).rglob('*'):
            if file_path.is_file():
                operation_start = time.time()
                try:
                    size = Storage.get_size_from_path(file_path)
                    total_size += size
                    file_count += 1

                    operation_duration = time.time() - operation_start
                    logger.log_file_operation(
                        'scan', str(file_path), size, operation_duration
                    )

                except Exception as e:
                    operation_duration = time.time() - operation_start
                    logger.log_file_operation(
                        'scan', str(file_path), None, operation_duration, str(e)
                    )

        total_duration = time.time() - start_time
        logger.log_performance_metrics(
            'directory_scan', file_count, total_size, total_duration
        )

        return {
            'total_size': total_size,
            'file_count': file_count,
            'duration': total_duration
        }

    except Exception as e:
        logger.logger.error(f"Directory scan failed: {e}")
        raise

๐Ÿงช Testing Best Practices

Comprehensive Test Coverage

import unittest
import tempfile
import os
from pathlib import Path

class TestStorageOperations(unittest.TestCase):
    """Comprehensive storage operation tests."""

    def setUp(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()
        self.test_files = []

        # Create test files of various sizes
        sizes = [100, 1024, 1048576, 10485760]  # 100B, 1KB, 1MB, 10MB
        for i, size in enumerate(sizes):
            file_path = Path(self.temp_dir) / f"test_file_{i}.bin"
            with open(file_path, 'wb') as f:
                f.write(b'0' * size)
            self.test_files.append(file_path)

    def tearDown(self):
        """Clean up test environment."""
        import shutil
        shutil.rmtree(self.temp_dir)

    def test_file_size_accuracy(self):
        """Test that file sizes are reported accurately."""
        expected_sizes = [100, 1024, 1048576, 10485760]

        for file_path, expected_size in zip(self.test_files, expected_sizes):
            with self.subTest(file=file_path, expected=expected_size):
                storage = Storage.get_size_from_path(file_path)
                self.assertEqual(storage.convert_to_bytes(), expected_size)

    def test_error_handling(self):
        """Test error handling for various scenarios."""
        # Non-existent file
        with self.assertRaises(FileNotFoundError):
            Storage.get_size_from_path("nonexistent_file.txt")

        # Invalid parsing
        with self.assertRaises(ValueError):
            Storage.parse("invalid size")

        # Negative values
        with self.assertRaises(ValueError):
            Storage(-1, StorageUnit.BYTES)

    def test_arithmetic_operations(self):
        """Test arithmetic operations comprehensively."""
        storage1 = Storage(1, StorageUnit.GB)
        storage2 = Storage(500, StorageUnit.MB)

        # Addition
        result = storage1 + storage2
        self.assertAlmostEqual(result.convert_to_gb().value, 1.5, places=6)

        # Subtraction
        result = storage1 - storage2
        self.assertAlmostEqual(result.convert_to_mb().value, 500, places=6)

        # Multiplication
        result = storage1 * 2
        self.assertEqual(result.convert_to_gb().value, 2.0)

        # Division
        ratio = storage1 / storage2
        self.assertEqual(ratio, 2.0)

    def test_conversion_accuracy(self):
        """Test conversion accuracy between units."""
        # Test binary conversions
        storage = Storage(1, StorageUnit.GIB)
        self.assertEqual(storage.convert_to_mib().value, 1024.0)
        self.assertEqual(storage.convert_to_kib().value, 1048576.0)

        # Test decimal conversions
        storage = Storage(1, StorageUnit.GB)
        self.assertEqual(storage.convert_to_mb().value, 1000.0)
        self.assertEqual(storage.convert_to_kb().value, 1000000.0)

        # Test bit conversions
        storage = Storage(1, StorageUnit.BYTES)
        self.assertEqual(storage.convert_to_bits().value, 8.0)

# Performance benchmarks
class StoragePerformanceTests(unittest.TestCase):
    """Performance-focused tests."""

    def test_large_file_performance(self):
        """Test performance with large files."""
        import time

        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            # Write 100MB file
            temp_file.write(b'0' * (100 * 1024 * 1024))
            temp_file.flush()

            # Measure performance
            start_time = time.time()
            size = Storage.get_size_from_path(temp_file.name)
            duration = time.time() - start_time

            # Should complete in reasonable time (< 1 second)
            self.assertLess(duration, 1.0)
            self.assertEqual(size.convert_to_mb().value, 100.0)

            os.unlink(temp_file.name)

๐Ÿ“ˆ Code Quality Guidelines

Type Hints and Documentation

from typing import List, Optional, Union, Dict, Any
from pathlib import Path

def analyze_storage_distribution(
    file_paths: List[Union[str, Path]],
    size_ranges: Optional[Dict[str, tuple]] = None
) -> Dict[str, Any]:
    """
    Analyze the distribution of file sizes across specified ranges.

    Args:
        file_paths: List of file paths to analyze
        size_ranges: Optional dict mapping range names to (min, max) tuples
                    in bytes. If None, uses default ranges.

    Returns:
        Dict containing:
        - 'distribution': Dict mapping range names to file counts
        - 'total_files': Total number of files processed
        - 'total_size': Total size of all files
        - 'average_size': Average file size
        - 'largest_file': Path and size of largest file
        - 'smallest_file': Path and size of smallest file

    Raises:
        ValueError: If size_ranges contains invalid ranges
        FileNotFoundError: If any file in file_paths doesn't exist

    Example:
        >>> files = ['file1.txt', 'file2.bin']
        >>> ranges = {'small': (0, 1024), 'large': (1024, float('inf'))}
        >>> result = analyze_storage_distribution(files, ranges)
        >>> print(result['distribution'])
        {'small': 1, 'large': 1}
    """
    if size_ranges is None:
        size_ranges = {
            'tiny': (0, 1024),                    # 0-1KB
            'small': (1024, 1024**2),            # 1KB-1MB
            'medium': (1024**2, 1024**3),        # 1MB-1GB
            'large': (1024**3, float('inf'))     # 1GB+
        }

    # Validate size ranges
    for name, (min_size, max_size) in size_ranges.items():
        if min_size < 0 or max_size <= min_size:
            raise ValueError(f"Invalid range '{name}': {min_size}-{max_size}")

    distribution = {name: 0 for name in size_ranges.keys()}
    file_sizes = []
    largest_file = None
    smallest_file = None

    for file_path in file_paths:
        size = Storage.get_size_from_path(file_path)
        size_bytes = size.convert_to_bytes()
        file_sizes.append((file_path, size))

        # Update largest/smallest tracking
        if largest_file is None or size > largest_file[1]:
            largest_file = (file_path, size)
        if smallest_file is None or size < smallest_file[1]:
            smallest_file = (file_path, size)

        # Categorize by size range
        for range_name, (min_size, max_size) in size_ranges.items():
            if min_size <= size_bytes < max_size:
                distribution[range_name] += 1
                break

    total_size = sum(size for _, size in file_sizes)
    average_size = total_size / len(file_sizes) if file_sizes else Storage(0, StorageUnit.BYTES)

    return {
        'distribution': distribution,
        'total_files': len(file_sizes),
        'total_size': total_size,
        'average_size': average_size,
        'largest_file': largest_file,
        'smallest_file': smallest_file
    }

These best practices will help you build robust, maintainable applications with Bytesize. Remember to adapt them to your specific use case and requirements.

Back to User Guide Examples