"""Pipeline CLI to process property data with H3 spatial indexing."""

import polars as pl

from pipeline.sources.postcodes import save_postcodes
from pipeline.sources.property_prices import PropertyPricesSource
from pipeline.processors.h3_aggregator import save_aggregates
from pipeline.processors.journey_times_aggregator import aggregate_journey_times


def run_pipeline():
    """Run the full data processing pipeline."""
    print("=" * 60)
    print("Property Map Data Pipeline")
    print("=" * 60)

    # Step 1: Process postcodes with H3 indices
    print("\n[1/4] Processing postcodes with H3 indices...")
    postcodes_path = save_postcodes()
    print(f"      Saved: {postcodes_path}")

    print("\n[2/4] Processing property prices...")
    postcodes = pl.scan_parquet(postcodes_path)
    property_source = PropertyPricesSource()
    properties = property_source.process(postcodes)
    print("      Joined property prices with postcodes")

    print("\n[3/4] Aggregating at H3 resolutions...")
    saved_paths = save_aggregates(properties)
    for path in saved_paths:
        size_mb = path.stat().st_size / (1024 * 1024)
        print(f"      Saved: {path.name} ({size_mb:.1f} MB)")

    print("\n[4/4] Adding journey times to aggregates...")
    updated_paths = aggregate_journey_times()
    if updated_paths:
        for path in updated_paths:
            size_mb = path.stat().st_size / (1024 * 1024)
            print(f"      Updated: {path.name} ({size_mb:.1f} MB)")
    else:
        print("      Skipped (no journey time data found)")


if __name__ == "__main__":
    run_pipeline()