perfect-postcode/server/routes/hexagons.py

98 lines
3.3 KiB
Python

from typing import Any
from fastapi import APIRouter, Query
import polars as pl
import h3
from server.config import (
AGGREGATES_DIR,
VALID_RESOLUTIONS,
DEFAULT_RESOLUTION,
DEFAULT_MIN_YEAR,
DEFAULT_MAX_YEAR,
DEFAULT_MIN_PRICE,
DEFAULT_MAX_PRICE,
)
router = APIRouter()
def h3_to_geojson_feature(h3_index: str, properties: dict[str, Any]) -> dict:
"""Convert H3 index to GeoJSON feature with polygon geometry."""
boundary = h3.cell_to_boundary(h3_index)
# h3 returns (lat, lng) pairs, GeoJSON needs [lng, lat]
coordinates = [[lng, lat] for lat, lng in boundary]
# Close the polygon
coordinates.append(coordinates[0])
return {
"type": "Feature",
"properties": {"h3": h3_index, **properties},
"geometry": {"type": "Polygon", "coordinates": [coordinates]},
}
@router.get("/hexagons")
async def get_hexagons(
resolution: int = Query(
DEFAULT_RESOLUTION,
ge=min(VALID_RESOLUTIONS),
le=max(VALID_RESOLUTIONS),
description=f"H3 resolution ({min(VALID_RESOLUTIONS)}-{max(VALID_RESOLUTIONS)})",
),
min_year: int = Query(DEFAULT_MIN_YEAR, description="Minimum year filter"),
max_year: int = Query(DEFAULT_MAX_YEAR, description="Maximum year filter"),
min_price: float = Query(DEFAULT_MIN_PRICE, description="Minimum average price"),
max_price: float = Query(DEFAULT_MAX_PRICE, description="Maximum average price"),
bounds: str | None = Query(
None, description="Bounding box: lat1,lng1,lat2,lng2"
),
) -> dict:
"""Get aggregated property data as GeoJSON hexagons."""
if resolution not in VALID_RESOLUTIONS:
resolution = DEFAULT_RESOLUTION
# Load the appropriate resolution file
parquet_path = AGGREGATES_DIR / f"res{resolution}.parquet"
if not parquet_path.exists():
return {"type": "FeatureCollection", "features": []}
df = pl.scan_parquet(parquet_path)
# Filter by year range
df = df.filter((pl.col("year") >= min_year) & (pl.col("year") <= max_year))
# Aggregate across years (weighted by count)
df = df.group_by("h3").agg(
pl.col("count").sum().alias("count"),
(pl.col("avg_price") * pl.col("count")).sum().alias("weighted_price_sum"),
pl.col("median_price").median().alias("median_price"),
pl.col("min_price").min().alias("min_price"),
pl.col("max_price").max().alias("max_price"),
)
# Calculate weighted average price
df = df.with_columns(
(pl.col("weighted_price_sum") / pl.col("count")).alias("avg_price")
).drop("weighted_price_sum")
# Filter by price range
df = df.filter(
(pl.col("avg_price") >= min_price) & (pl.col("avg_price") <= max_price)
)
# Collect and convert to GeoJSON
result = df.collect()
features = []
for row in result.iter_rows(named=True):
h3_index = row["h3"]
properties = {
"count": row["count"],
"avg_price": round(row["avg_price"], 2),
"median_price": round(row["median_price"], 2) if row["median_price"] else None,
"min_price": row["min_price"],
"max_price": row["max_price"],
}
features.append(h3_to_geojson_feature(h3_index, properties))
return {"type": "FeatureCollection", "features": features}