Update H3 pipeline
This commit is contained in:
parent
68b6dcf65e
commit
6122ee44da
13 changed files with 291 additions and 420 deletions
|
|
@ -1,45 +1,6 @@
|
|||
"""Pipeline CLI to process property data with H3 spatial indexing."""
|
||||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.sources.postcodes import save_postcodes
|
||||
from pipeline.sources.property_prices import PropertyPricesSource
|
||||
from pipeline.processors.h3_aggregator import save_aggregates
|
||||
from pipeline.processors.journey_times_aggregator import aggregate_journey_times
|
||||
|
||||
|
||||
def run_pipeline():
|
||||
"""Run the full data processing pipeline."""
|
||||
print("=" * 60)
|
||||
print("Property Map Data Pipeline")
|
||||
print("=" * 60)
|
||||
|
||||
# Step 1: Process postcodes with H3 indices
|
||||
print("\n[1/4] Processing postcodes with H3 indices...")
|
||||
postcodes_path = save_postcodes()
|
||||
print(f" Saved: {postcodes_path}")
|
||||
|
||||
print("\n[2/4] Processing property prices...")
|
||||
postcodes = pl.scan_parquet(postcodes_path)
|
||||
property_source = PropertyPricesSource()
|
||||
properties = property_source.process(postcodes)
|
||||
print(" Joined property prices with postcodes")
|
||||
|
||||
print("\n[3/4] Aggregating at H3 resolutions...")
|
||||
saved_paths = save_aggregates(properties)
|
||||
for path in saved_paths:
|
||||
size_mb = path.stat().st_size / (1024 * 1024)
|
||||
print(f" Saved: {path.name} ({size_mb:.1f} MB)")
|
||||
|
||||
print("\n[4/4] Adding journey times to aggregates...")
|
||||
updated_paths = aggregate_journey_times()
|
||||
if updated_paths:
|
||||
for path in updated_paths:
|
||||
size_mb = path.stat().st_size / (1024 * 1024)
|
||||
print(f" Updated: {path.name} ({size_mb:.1f} MB)")
|
||||
else:
|
||||
print(" Skipped (no journey time data found)")
|
||||
|
||||
from pipeline.wide import run
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_pipeline()
|
||||
run()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue