Add hexagon backend
This commit is contained in:
parent
a7cc4d9b2b
commit
ab704c0dc0
18 changed files with 1443 additions and 0 deletions
0
pipeline/processors/__init__.py
Normal file
0
pipeline/processors/__init__.py
Normal file
42
pipeline/processors/h3_aggregator.py
Normal file
42
pipeline/processors/h3_aggregator.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from pathlib import Path
|
||||
import polars as pl
|
||||
|
||||
from pipeline.config import AGGREGATES_DIR, H3_RESOLUTIONS
|
||||
|
||||
|
||||
def aggregate(df: pl.LazyFrame, resolution: int) -> pl.LazyFrame:
|
||||
"""Aggregate property data by H3 cell and year."""
|
||||
h3_col = f"h3_res{resolution}"
|
||||
|
||||
return (
|
||||
df.group_by(h3_col, "year")
|
||||
.agg(
|
||||
pl.len().alias("count"),
|
||||
pl.col("price").mean().alias("avg_price"),
|
||||
pl.col("price").median().alias("median_price"),
|
||||
pl.col("price").min().alias("min_price"),
|
||||
pl.col("price").max().alias("max_price"),
|
||||
)
|
||||
.rename({h3_col: "h3"})
|
||||
)
|
||||
|
||||
|
||||
def aggregate_all(df: pl.LazyFrame) -> dict[int, pl.LazyFrame]:
|
||||
"""Aggregate at all H3 resolutions."""
|
||||
return {res: aggregate(df, res) for res in H3_RESOLUTIONS}
|
||||
|
||||
|
||||
def save_aggregates(df: pl.LazyFrame, output_dir: Path | None = None) -> list[Path]:
|
||||
"""Aggregate and save at all H3 resolutions."""
|
||||
output_dir = output_dir or AGGREGATES_DIR
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
saved_paths = []
|
||||
aggregates = aggregate_all(df)
|
||||
|
||||
for res, agg_df in aggregates.items():
|
||||
output_path = output_dir / f"res{res}.parquet"
|
||||
agg_df.collect().write_parquet(output_path)
|
||||
saved_paths.append(output_path)
|
||||
|
||||
return saved_paths
|
||||
Loading…
Add table
Add a link
Reference in a new issue