Fix loading times

This commit is contained in:
Andras Schmelczer 2026-01-26 20:42:52 +00:00
parent f685bdca04
commit 2f41c38cc4
4 changed files with 34 additions and 18 deletions

View file

@ -164,7 +164,8 @@ export default function Map({ data, onViewChange }: MapProps) {
getFillColor: (d) => priceToColor(d.avg_price),
extruded: false,
pickable: true,
opacity: 0.7,
opacity: 0.5,
highPrecision: true,
}),
],
[data]

View file

@ -37,5 +37,4 @@ export interface ViewChangeParams {
export interface ApiResponse {
features: HexagonData[];
truncated: boolean;
}

View file

@ -1,3 +1,4 @@
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
@ -5,7 +6,16 @@ from fastapi.staticfiles import StaticFiles
from server.routes import hexagons
app = FastAPI(title="Property Map API")
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup: preload all parquet files
hexagons.preload_dataframes()
yield
# Shutdown: nothing to clean up
app = FastAPI(title="Property Map API", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,

View file

@ -1,8 +1,11 @@
import math
from functools import lru_cache
from fastapi import APIRouter, Query, HTTPException
import polars as pl
import h3
from tqdm import tqdm
from server.config import (
AGGREGATES_DIR,
VALID_RESOLUTIONS,
@ -19,6 +22,12 @@ router = APIRouter()
_df_cache: dict[int, pl.DataFrame] = {}
def preload_dataframes() -> None:
"""Load all resolution dataframes into cache on startup."""
for resolution in tqdm(VALID_RESOLUTIONS, desc="Loading parquet files"):
get_cached_df(resolution)
def get_cached_df(resolution: int) -> pl.DataFrame | None:
"""Get cached dataframe for resolution, loading from disk if needed."""
if resolution not in _df_cache:
@ -48,8 +57,8 @@ def query_hexagons_cached(
min_price: int,
max_price: int,
bounds_tuple: tuple[float, float, float, float],
) -> tuple[list[dict], bool]:
"""Cached query - returns (features, truncated)."""
) -> list[dict]:
"""Cached query - returns features list."""
south, west, north, east = bounds_tuple
df = get_cached_df(resolution)
@ -86,12 +95,6 @@ def query_hexagons_cached(
(pl.col("avg_price") >= min_price) & (pl.col("avg_price") <= max_price)
)
# Limit results
MAX_HEXAGONS = 50000
truncated = len(df) >= MAX_HEXAGONS
if truncated:
df = df.limit(MAX_HEXAGONS)
# Build response efficiently using Polars
df = df.select(
[
@ -104,7 +107,7 @@ def query_hexagons_cached(
]
)
return df.to_dicts(), truncated
return df.to_dicts()
@router.get("/hexagons")
@ -136,15 +139,18 @@ async def get_hexagons(
)
# Round bounds to reduce cache misses (0.01 degree ≈ 1km precision)
# Always expand bounds (floor for min, ceil for max) to prevent hexagons
# popping in when crossing rounding boundaries
precision = 0.01
bounds_tuple = (
round(south, 2),
round(west, 2),
round(north, 2),
round(east, 2),
math.floor(south / precision) * precision,
math.floor(west / precision) * precision,
math.ceil(north / precision) * precision,
math.ceil(east / precision) * precision,
)
# Convert prices to int for cache key hashability
features, truncated = query_hexagons_cached(
features = query_hexagons_cached(
resolution,
min_year,
max_year,
@ -153,4 +159,4 @@ async def get_hexagons(
bounds_tuple,
)
return {"features": features, "truncated": truncated}
return {"features": features}