from typing import Any from fastapi import APIRouter, Query import polars as pl import h3 from server.config import ( AGGREGATES_DIR, VALID_RESOLUTIONS, DEFAULT_RESOLUTION, DEFAULT_MIN_YEAR, DEFAULT_MAX_YEAR, DEFAULT_MIN_PRICE, DEFAULT_MAX_PRICE, ) router = APIRouter() def h3_to_geojson_feature(h3_index: str, properties: dict[str, Any]) -> dict: """Convert H3 index to GeoJSON feature with polygon geometry.""" boundary = h3.cell_to_boundary(h3_index) # h3 returns (lat, lng) pairs, GeoJSON needs [lng, lat] coordinates = [[lng, lat] for lat, lng in boundary] # Close the polygon coordinates.append(coordinates[0]) return { "type": "Feature", "properties": {"h3": h3_index, **properties}, "geometry": {"type": "Polygon", "coordinates": [coordinates]}, } @router.get("/hexagons") async def get_hexagons( resolution: int = Query( DEFAULT_RESOLUTION, ge=min(VALID_RESOLUTIONS), le=max(VALID_RESOLUTIONS), description=f"H3 resolution ({min(VALID_RESOLUTIONS)}-{max(VALID_RESOLUTIONS)})", ), min_year: int = Query(DEFAULT_MIN_YEAR, description="Minimum year filter"), max_year: int = Query(DEFAULT_MAX_YEAR, description="Maximum year filter"), min_price: float = Query(DEFAULT_MIN_PRICE, description="Minimum average price"), max_price: float = Query(DEFAULT_MAX_PRICE, description="Maximum average price"), bounds: str | None = Query( None, description="Bounding box: lat1,lng1,lat2,lng2" ), ) -> dict: """Get aggregated property data as GeoJSON hexagons.""" if resolution not in VALID_RESOLUTIONS: resolution = DEFAULT_RESOLUTION # Load the appropriate resolution file parquet_path = AGGREGATES_DIR / f"res{resolution}.parquet" if not parquet_path.exists(): return {"type": "FeatureCollection", "features": []} df = pl.scan_parquet(parquet_path) # Filter by year range df = df.filter((pl.col("year") >= min_year) & (pl.col("year") <= max_year)) # Aggregate across years (weighted by count) df = df.group_by("h3").agg( pl.col("count").sum().alias("count"), (pl.col("avg_price") * pl.col("count")).sum().alias("weighted_price_sum"), pl.col("median_price").median().alias("median_price"), pl.col("min_price").min().alias("min_price"), pl.col("max_price").max().alias("max_price"), ) # Calculate weighted average price df = df.with_columns( (pl.col("weighted_price_sum") / pl.col("count")).alias("avg_price") ).drop("weighted_price_sum") # Filter by price range df = df.filter( (pl.col("avg_price") >= min_price) & (pl.col("avg_price") <= max_price) ) # Collect and convert to GeoJSON result = df.collect() features = [] for row in result.iter_rows(named=True): h3_index = row["h3"] properties = { "count": row["count"], "avg_price": round(row["avg_price"], 2), "median_price": round(row["median_price"], 2) if row["median_price"] else None, "min_price": row["min_price"], "max_price": row["max_price"], } features.append(h3_to_geojson_feature(h3_index, properties)) return {"type": "FeatureCollection", "features": features}