Rewrite server in rust

This commit is contained in:
Andras Schmelczer 2026-01-31 10:18:54 +00:00
parent 0cea9b873c
commit bf2d5de156
13 changed files with 3875 additions and 547 deletions

11
server-rs/src/consts.rs Normal file
View file

@ -0,0 +1,11 @@
/// Lower percentile for feature range reporting
pub const FEATURE_PERCENTILE_LOW: f64 = 2.0;
/// Upper percentile for feature range reporting
pub const FEATURE_PERCENTILE_HIGH: f64 = 98.0;
pub const HISTOGRAM_BINS: usize = 100;
/// H3 resolutions to precompute at startup (covers typical zoom levels)
pub const H3_PRECOMPUTE_MIN: u8 = 4;
pub const H3_PRECOMPUTE_MAX: u8 = 12;

405
server-rs/src/data.rs Normal file
View file

@ -0,0 +1,405 @@
use polars::prelude::*;
use polars::lazy::frame::LazyFrame;
use rayon::prelude::*;
use serde::Serialize;
use std::path::Path;
use crate::consts::{FEATURE_PERCENTILE_LOW, FEATURE_PERCENTILE_HIGH, HISTOGRAM_BINS, H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX};
/// Columns to exclude from feature discovery (not numeric features)
const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"];
/// H3 valid resolution range (0-15)
pub const MIN_RESOLUTION: u8 = 0;
pub const MAX_RESOLUTION: u8 = 15;
pub const DEFAULT_RESOLUTION: u8 = 8;
/// Returns true if the polars DataType is numeric (integer or float)
fn is_numeric_dtype(dtype: &DataType) -> bool {
matches!(
dtype,
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float32
| DataType::Float64
)
}
/// Histogram for a single feature column
#[derive(Serialize, Clone)]
pub struct Histogram {
/// Left edge of first bin
pub min: f64,
/// Right edge of last bin
pub max: f64,
/// Width of each bin
pub bin_width: f64,
/// Count of values in each bin
pub counts: Vec<u64>,
}
/// Precomputed statistics for a single feature
pub struct FeatureStats {
pub p_low: f64,
pub p_high: f64,
pub histogram: Histogram,
}
/// Columnar storage for all property data.
/// Feature values use NaN as the null sentinel.
pub struct PropertyData {
pub lat: Vec<f64>,
pub lon: Vec<f64>,
/// Dynamically discovered numeric feature column names
pub feature_names: Vec<String>,
/// Number of feature columns
pub num_features: usize,
/// Row-major flat array: feature_data[row * num_features + feat_idx].
/// NaN = null. Contiguous layout for cache-friendly per-row access.
pub feature_data: Vec<f64>,
/// Precomputed stats (percentiles + histogram) for each feature
pub feature_stats: Vec<FeatureStats>,
}
/// Approximate a percentile from a histogram using linear interpolation.
/// `p` is in [0, 100]. `total` is the sum of all bin counts.
fn percentile_from_histogram(counts: &[u64], min: f64, bin_width: f64, total: usize, p: f64) -> f64 {
let target = (p / 100.0) * (total as f64 - 1.0);
let mut cumulative = 0u64;
for (i, &c) in counts.iter().enumerate() {
let prev = cumulative;
cumulative += c;
if cumulative as f64 > target {
// Interpolate within this bin
let frac = if c > 0 {
(target - prev as f64) / c as f64
} else {
0.0
};
return min + (i as f64 + frac) * bin_width;
}
}
// Fallback: right edge of last bin
min + counts.len() as f64 * bin_width
}
/// Build a histogram and compute approximate percentiles in O(n) — no sort needed.
fn compute_feature_stats(vals: &[f64]) -> FeatureStats {
// Single pass: min, max, count (skipping NaN)
let mut min = f64::INFINITY;
let mut max = f64::NEG_INFINITY;
let mut count = 0usize;
for &v in vals {
if !v.is_nan() {
if v < min { min = v; }
if v > max { max = v; }
count += 1;
}
}
if count == 0 {
return FeatureStats {
p_low: 0.0,
p_high: 0.0,
histogram: Histogram {
min: 0.0,
max: 0.0,
bin_width: 1.0,
counts: vec![0; HISTOGRAM_BINS],
},
};
}
// Build histogram over full range (second pass, no sort)
let range = if max == min { 1.0 } else { max - min };
let bin_max = min + range * (1.0 + 1e-9);
let bin_width = (bin_max - min) / HISTOGRAM_BINS as f64;
let mut counts = vec![0u64; HISTOGRAM_BINS];
for &v in vals {
if !v.is_nan() {
let bin = ((v - min) / bin_width) as usize;
counts[bin.min(HISTOGRAM_BINS - 1)] += 1;
}
}
// Approximate percentiles from the histogram
let p_low = percentile_from_histogram(&counts, min, bin_width, count, FEATURE_PERCENTILE_LOW);
let p_high = percentile_from_histogram(&counts, min, bin_width, count, FEATURE_PERCENTILE_HIGH);
FeatureStats {
p_low,
p_high,
histogram: Histogram {
min,
max,
bin_width,
counts,
},
}
}
/// Convert a polars Column to Vec<f64> using NaN for null values
fn column_to_f64_vec(c: &Column) -> Vec<f64> {
let s = c.cast(&DataType::Float64).unwrap();
let ca = s.f64().unwrap();
ca.into_iter().map(|v| v.unwrap_or(f64::NAN)).collect()
}
/// Precompute H3 cell IDs for all rows at commonly used resolutions.
/// Returns a Vec indexed by resolution (0..16), where non-precomputed
/// resolutions have an empty Vec.
pub fn precompute_h3(lat: &[f64], lon: &[f64]) -> Vec<Vec<u64>> {
eprintln!(
"Precomputing H3 cells for resolutions {}..{}...",
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
);
let resolutions: Vec<u8> = (H3_PRECOMPUTE_MIN..=H3_PRECOMPUTE_MAX).collect();
let computed: Vec<(u8, Vec<u64>)> = resolutions
.into_par_iter()
.map(|res| {
let h3_res = h3o::Resolution::try_from(res).unwrap();
let cells: Vec<u64> = lat
.iter()
.zip(lon.iter())
.map(|(&la, &lo)| {
h3o::LatLng::new(la, lo)
.map(|c| u64::from(c.to_cell(h3_res)))
.unwrap_or(0)
})
.collect();
eprintln!(" Resolution {} done ({} cells)", res, cells.len());
(res, cells)
})
.collect();
let mut result: Vec<Vec<u64>> = (0..16).map(|_| Vec::new()).collect();
for (res, cells) in computed {
result[res as usize] = cells;
}
eprintln!("H3 precomputation complete.");
result
}
impl PropertyData {
pub fn load(parquet_path: &Path) -> Self {
eprintln!("Loading parquet from {:?}...", parquet_path);
// Scan schema to discover numeric feature columns
let mut lf = LazyFrame::scan_parquet(parquet_path, Default::default())
.expect("Failed to scan parquet");
let schema = lf.collect_schema().expect("Failed to read schema");
let feature_names: Vec<String> = schema
.iter()
.filter(|(name, dtype)| {
is_numeric_dtype(dtype) && !EXCLUDED_COLUMNS.contains(&name.as_str())
})
.map(|(name, _)| name.to_string())
.collect();
let num_features = feature_names.len();
eprintln!("Discovered {} numeric feature columns", num_features);
// Read only the columns we need
let mut cols_needed: Vec<String> = vec!["lat".into(), "lon".into()];
cols_needed.extend(feature_names.iter().cloned());
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
.expect("Failed to scan parquet")
.select(
cols_needed
.iter()
.map(|c| col(c.as_str()).cast(DataType::Float64))
.collect::<Vec<_>>(),
)
.collect()
.expect("Failed to read parquet");
let row_count = df.height();
eprintln!("Loaded {} rows", row_count);
// Extract lat/lon using bulk iterator
let lat_series = df.column("lat").unwrap().cast(&DataType::Float64).unwrap();
let lat: Vec<f64> = lat_series.f64().unwrap().into_iter().map(|v| v.unwrap_or(0.0)).collect();
let lon_series = df.column("lon").unwrap().cast(&DataType::Float64).unwrap();
let lon: Vec<f64> = lon_series.f64().unwrap().into_iter().map(|v| v.unwrap_or(0.0)).collect();
// Extract feature columns (column-major, for cache-friendly histogram computation)
eprintln!("Extracting feature columns...");
let col_major: Vec<Vec<f64>> = feature_names
.iter()
.map(|name| {
let s = df.column(name.as_str()).unwrap();
column_to_f64_vec(s)
})
.collect();
// Compute histograms in parallel (column-major is ideal for per-column iteration)
eprintln!("Computing histograms...");
let feature_stats: Vec<FeatureStats> = col_major
.par_iter()
.enumerate()
.map(|(i, vals)| {
let stats = compute_feature_stats(vals);
eprintln!(
" {}: p{}={:.2}, p{}={:.2}, {} bins",
feature_names[i],
FEATURE_PERCENTILE_LOW, stats.p_low,
FEATURE_PERCENTILE_HIGH, stats.p_high,
stats.histogram.counts.len()
);
stats
})
.collect();
// Sort all rows by spatial locality so that grid queries access
// contiguous memory (sequential reads instead of random DRAM accesses).
// Uses the same 0.01° grid cell as the spatial index for the sort key.
eprintln!("Sorting rows by spatial locality...");
let grid_cell_size = 0.01_f64;
let min_lat_val = lat.iter().cloned().fold(f64::INFINITY, f64::min) - grid_cell_size;
let min_lon_val = lon.iter().cloned().fold(f64::INFINITY, f64::min) - grid_cell_size;
let max_lon_val = lon.iter().cloned().fold(f64::NEG_INFINITY, f64::max) + grid_cell_size;
let grid_cols = ((max_lon_val - min_lon_val) / grid_cell_size).ceil() as u64 + 1;
let mut perm: Vec<u32> = (0..row_count as u32).collect();
perm.sort_unstable_by_key(|&i| {
let r = ((lat[i as usize] - min_lat_val) / grid_cell_size) as u64;
let c = ((lon[i as usize] - min_lon_val) / grid_cell_size) as u64;
r * grid_cols + c
});
// Apply permutation to lat/lon
let lat: Vec<f64> = perm.iter().map(|&i| lat[i as usize]).collect();
let lon: Vec<f64> = perm.iter().map(|&i| lon[i as usize]).collect();
// Transpose to row-major AND apply spatial permutation in one pass.
// Result: all features for one row are contiguous, and spatially
// nearby rows are adjacent in memory.
eprintln!("Transposing to row-major layout (spatially sorted)...");
let mut feature_data = vec![f64::NAN; row_count * num_features];
for (new_row, &old_row) in perm.iter().enumerate() {
let old = old_row as usize;
let dst_base = new_row * num_features;
for (feat_idx, col_vec) in col_major.iter().enumerate() {
feature_data[dst_base + feat_idx] = col_vec[old];
}
}
eprintln!("Data loading complete.");
PropertyData {
lat,
lon,
feature_names,
num_features,
feature_data,
feature_stats,
}
}
}
/// Point of Interest data
#[derive(Serialize)]
pub struct POI {
pub id: String,
pub name: String,
pub category: String,
pub lat: f64,
pub lng: f64,
pub emoji: String,
}
/// Columnar storage for POI data
pub struct POIData {
pub id: Vec<String>,
pub name: Vec<String>,
pub category: Vec<String>,
pub lat: Vec<f64>,
pub lng: Vec<f64>,
pub emoji: Vec<String>,
}
impl POIData {
pub fn load(parquet_path: &Path) -> Self {
eprintln!("Loading POI data from {:?}...", parquet_path);
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
.expect("Failed to scan POI parquet")
.collect()
.expect("Failed to read POI parquet");
let row_count = df.height();
eprintln!("Loaded {} POIs", row_count);
// Extract columns
let id: Vec<String> = df.column("id")
.unwrap()
.str()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or("").to_string())
.collect();
let name: Vec<String> = df.column("name")
.unwrap()
.str()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or("").to_string())
.collect();
let category: Vec<String> = df.column("category")
.unwrap()
.str()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or("").to_string())
.collect();
let lat: Vec<f64> = df.column("lat")
.unwrap()
.f64()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or(0.0))
.collect();
let lng: Vec<f64> = df.column("lng")
.unwrap()
.f64()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or(0.0))
.collect();
let emoji: Vec<String> = df.column("emoji")
.unwrap()
.str()
.unwrap()
.into_iter()
.map(|v| v.unwrap_or("").to_string())
.collect();
eprintln!("POI data loading complete.");
POIData {
id,
name,
category,
lat,
lng,
emoji,
}
}
}

130
server-rs/src/index.rs Normal file
View file

@ -0,0 +1,130 @@
/// Grid-based spatial index for fast rectangle queries over property rows.
///
/// Divides the UK bounding box into cells of ~0.01 degrees (~1km),
/// each storing indices of rows whose lat/lon falls within that cell.
pub struct GridIndex {
min_lat: f64,
min_lon: f64,
cell_size: f64,
cols: usize,
rows: usize,
/// cells[row * cols + col] = vec of row indices
cells: Vec<Vec<u32>>,
}
impl GridIndex {
/// Build the grid index from lat/lon arrays.
pub fn build(lat: &[f64], lon: &[f64], cell_size: f64) -> Self {
// Compute bounding box with a small margin
let mut min_lat = f64::INFINITY;
let mut max_lat = f64::NEG_INFINITY;
let mut min_lon = f64::INFINITY;
let mut max_lon = f64::NEG_INFINITY;
for i in 0..lat.len() {
let la = lat[i];
let lo = lon[i];
if la < min_lat {
min_lat = la;
}
if la > max_lat {
max_lat = la;
}
if lo < min_lon {
min_lon = lo;
}
if lo > max_lon {
max_lon = lo;
}
}
// Add margin
min_lat -= cell_size;
min_lon -= cell_size;
max_lat += cell_size;
max_lon += cell_size;
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
eprintln!(
"Building grid index: {}x{} cells ({} total), cell_size={}",
rows,
cols,
rows * cols,
cell_size
);
let mut cells: Vec<Vec<u32>> = vec![Vec::new(); rows * cols];
for i in 0..lat.len() {
let r = ((lat[i] - min_lat) / cell_size) as usize;
let c = ((lon[i] - min_lon) / cell_size) as usize;
let idx = r * cols + c;
cells[idx].push(i as u32);
}
eprintln!("Grid index built.");
GridIndex {
min_lat,
min_lon,
cell_size,
cols,
rows,
cells,
}
}
/// Query all row indices within the given bounding box.
pub fn query(&self, south: f64, west: f64, north: f64, east: f64) -> Vec<u32> {
let (r_min, r_max, c_min, c_max) = self.clamp_bounds(south, west, north, east);
let mut result = Vec::new();
for r in r_min..=r_max {
let row_start = r * self.cols;
for c in c_min..=c_max {
result.extend_from_slice(&self.cells[row_start + c]);
}
}
result
}
/// Iterate all row indices in bounds without allocating a Vec.
#[inline]
pub fn for_each_in_bounds(
&self,
south: f64,
west: f64,
north: f64,
east: f64,
mut f: impl FnMut(u32),
) {
let (r_min, r_max, c_min, c_max) = self.clamp_bounds(south, west, north, east);
for r in r_min..=r_max {
let row_start = r * self.cols;
for c in c_min..=c_max {
for &row_idx in &self.cells[row_start + c] {
f(row_idx);
}
}
}
}
fn clamp_bounds(&self, south: f64, west: f64, north: f64, east: f64) -> (usize, usize, usize, usize) {
let r_min = ((south - self.min_lat) / self.cell_size) as isize;
let r_max = ((north - self.min_lat) / self.cell_size) as isize;
let c_min = ((west - self.min_lon) / self.cell_size) as isize;
let c_max = ((east - self.min_lon) / self.cell_size) as isize;
let r_min = r_min.max(0) as usize;
let r_max = (r_max.min(self.rows as isize - 1)).max(0) as usize;
let c_min = c_min.max(0) as usize;
let c_max = (c_max.min(self.cols as isize - 1)).max(0) as usize;
(r_min, r_max, c_min, c_max)
}
}

109
server-rs/src/main.rs Normal file
View file

@ -0,0 +1,109 @@
mod consts;
mod data;
mod index;
mod routes;
use std::path::PathBuf;
use std::sync::Arc;
use axum::routing::get;
use axum::Router;
use tower_http::compression::CompressionLayer;
use tower_http::cors::{Any, CorsLayer};
use tower_http::services::ServeDir;
use routes::AppState;
#[tokio::main]
async fn main() {
let parquet_path = PathBuf::from(
std::env::args()
.nth(1)
.unwrap_or_else(|| "data_sources/processed/wide.parquet".to_string()),
);
if !parquet_path.exists() {
eprintln!("Error: {} not found.", parquet_path.display());
std::process::exit(1);
}
// Load property data and build indices
let property_data = data::PropertyData::load(&parquet_path);
let grid = index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon);
// Load POI data and build spatial index
// Derive POI path from the data parquet path (same directory)
let poi_path = parquet_path
.parent()
.and_then(|p| p.parent())
.map(|p| p.join("filtered_uk_pois.parquet"))
.unwrap_or_else(|| PathBuf::from("data_sources/filtered_uk_pois.parquet"));
let poi_data = if poi_path.exists() {
data::POIData::load(&poi_path)
} else {
eprintln!("Warning: {} not found. POI endpoints will be unavailable.", poi_path.display());
data::POIData {
id: Vec::new(),
name: Vec::new(),
category: Vec::new(),
lat: Vec::new(),
lng: Vec::new(),
emoji: Vec::new(),
}
};
let poi_grid = index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
let state = Arc::new(AppState {
data: property_data,
grid,
h3_cells,
poi_data,
poi_grid,
});
let cors = CorsLayer::new()
.allow_origin(Any)
.allow_methods(Any)
.allow_headers(Any);
// API routes
let state_features = state.clone();
let state_hexagons = state.clone();
let state_pois = state.clone();
let state_poi_categories = state.clone();
let api = Router::new()
.route(
"/api/features",
get(move || routes::get_features(state_features.clone())),
)
.route(
"/api/hexagons",
get(move |query| routes::get_hexagons(state_hexagons.clone(), query)),
)
.route(
"/api/pois",
get(move |query| routes::get_pois(state_pois.clone(), query)),
)
.route(
"/api/poi-categories",
get(move || routes::get_poi_categories(state_poi_categories.clone())),
);
// Static file serving for frontend
let frontend_dist = PathBuf::from("frontend/dist");
let app = if frontend_dist.exists() {
api.fallback_service(ServeDir::new(frontend_dist))
} else {
api
};
let app = app.layer(cors).layer(CompressionLayer::new().gzip(true));
let addr = "0.0.0.0:8001";
eprintln!("Server listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
axum::serve(listener, app).await.unwrap();
}

461
server-rs/src/routes.rs Normal file
View file

@ -0,0 +1,461 @@
use std::fmt::Write;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use crate::data::{Histogram, PropertyData, POIData, POI, DEFAULT_RESOLUTION, MAX_RESOLUTION, MIN_RESOLUTION};
use crate::index::GridIndex;
/// Shared application state
pub struct AppState {
pub data: PropertyData,
pub grid: GridIndex,
/// h3_cells[resolution][row_idx] = precomputed H3 cell ID.
/// Empty Vec for resolutions not precomputed.
pub h3_cells: Vec<Vec<u64>>,
pub poi_data: POIData,
pub poi_grid: GridIndex,
}
const BOUNDS_BUFFER_PERCENT: f64 = 0.2;
// ── /api/features ──
#[derive(Serialize)]
pub struct FeatureInfo {
name: String,
min: f64,
max: f64,
label: String,
histogram: Histogram,
}
#[derive(Serialize)]
pub struct FeaturesResponse {
features: Vec<FeatureInfo>,
}
fn snake_to_label(name: &str) -> String {
name.split('_')
.map(|word| {
let mut chars = word.chars();
match chars.next() {
None => String::new(),
Some(c) => {
let mut s = c.to_uppercase().to_string();
s.extend(chars);
s
}
}
})
.collect::<Vec<_>>()
.join(" ")
}
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
let features = state
.data
.feature_names
.iter()
.enumerate()
.map(|(i, name): (usize, &String)| {
let stats = &state.data.feature_stats[i];
FeatureInfo {
name: name.clone(),
min: stats.p_low,
max: stats.p_high,
label: snake_to_label(name),
histogram: stats.histogram.clone(),
}
})
.collect();
Json(FeaturesResponse { features })
}
// ── /api/hexagons ──
#[derive(Deserialize)]
pub struct HexagonParams {
resolution: Option<u8>,
bounds: Option<String>,
/// Comma-separated filters: `name:min:max,...`
/// Rows must have non-NaN values within [min,max] for each filter.
filters: Option<String>,
}
struct ParsedFilter {
feat_idx: usize,
min: f64,
max: f64,
}
/// Per-cell accumulator for aggregating features
struct CellAgg {
count: u32,
mins: Vec<f64>,
maxs: Vec<f64>,
}
impl CellAgg {
fn new(num_features: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f64::INFINITY; num_features],
maxs: vec![f64::NEG_INFINITY; num_features],
}
}
/// Add a row using row-major feature_data layout.
/// feature_data[row * num_features + feat_idx] — all features for one row
/// are contiguous, so this reads a single cache line per ~8 features.
#[inline]
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (i, &v) in row_slice.iter().enumerate() {
if v.is_finite() {
if v < self.mins[i] {
self.mins[i] = v;
}
if v > self.maxs[i] {
self.maxs[i] = v;
}
}
}
}
}
/// Write the hexagons JSON response directly to a String buffer,
/// avoiding serde_json::Value allocations entirely.
fn write_hexagons_json(
buf: &mut String,
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
) {
buf.push_str("{\"features\":[");
let mut first = true;
for (&cell_id, agg) in groups {
if !first {
buf.push(',');
}
first = false;
let cell = h3o::CellIndex::try_from(cell_id).unwrap();
write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, agg.count).unwrap();
for i in 0..num_features {
if agg.mins[i] != f64::INFINITY {
write!(
buf,
",\"{}\":{},\"{}\":{}",
min_keys[i], agg.mins[i], max_keys[i], agg.maxs[i]
)
.unwrap();
}
}
buf.push('}');
}
buf.push_str("]}");
}
pub async fn get_hexagons(
state: Arc<AppState>,
Query(params): Query<HexagonParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let resolution = params.resolution.unwrap_or(DEFAULT_RESOLUTION);
if resolution > MAX_RESOLUTION {
return Err((
StatusCode::BAD_REQUEST,
format!(
"resolution must be between {} and {}",
MIN_RESOLUTION, MAX_RESOLUTION
),
));
}
let bounds_str = params
.bounds
.ok_or((StatusCode::BAD_REQUEST, "bounds parameter is required".into()))?;
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
let (mut south, mut west, mut north, mut east) = (parts[0], parts[1], parts[2], parts[3]);
// Apply bounds buffer (20%)
let lat_range = north - south;
let lng_range = east - west;
south -= lat_range * BOUNDS_BUFFER_PERCENT;
north += lat_range * BOUNDS_BUFFER_PERCENT;
west -= lng_range * BOUNDS_BUFFER_PERCENT;
east += lng_range * BOUNDS_BUFFER_PERCENT;
// Quantize to 0.01 degree precision
let precision = 0.01;
south = (south / precision).floor() * precision;
west = (west / precision).floor() * precision;
north = (north / precision).ceil() * precision;
east = (east / precision).ceil() * precision;
// Parse filters: `name:min:max,...`
let parsed_filters: Vec<ParsedFilter> = params
.filters
.as_deref()
.filter(|s| !s.is_empty())
.map(|s| {
s.split(',')
.filter_map(|entry| {
let parts: Vec<&str> = entry.splitn(3, ':').collect();
if parts.len() != 3 {
return None;
}
let name = parts[0].trim();
let min = parts[1].trim().parse::<f64>().ok()?;
let max = parts[2].trim().parse::<f64>().ok()?;
let feat_idx = state.data.feature_names.iter().position(|n| n == name)?;
Some(ParsedFilter { feat_idx, min, max })
})
.collect()
})
.unwrap_or_default();
// Move CPU-heavy work off the async executor
let json_body = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
// Pre-compute JSON key strings once
let min_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("min_{}", n))
.collect();
let max_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("max_{}", n))
.collect();
// Use precomputed H3 cells if available
let h3_cells_for_res: Option<&[u64]> = state
.h3_cells
.get(resolution as usize)
.filter(|v| !v.is_empty())
.map(|v| v.as_slice());
// Aggregate using FxHashMap (fast non-crypto hash for integer keys)
// and grid visitor (no intermediate Vec<u32> allocation)
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
// Row-level filter check: value must be non-NaN and within [min, max]
let row_passes = |row: usize| -> bool {
parsed_filters.iter().all(|f| {
let v = feature_data[row * num_features + f.feat_idx];
v.is_finite() && v >= f.min && v <= f.max
})
};
if let Some(precomputed) = h3_cells_for_res {
// Fast path: precomputed H3 + visitor pattern
state.grid.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes(row) {
return;
}
let cell_id = precomputed[row];
groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
});
} else {
// Fallback: compute H3 on-the-fly
let h3_res = h3o::Resolution::try_from(resolution).unwrap();
state.grid.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes(row) {
return;
}
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
.map(|c| u64::from(c.to_cell(h3_res)))
.unwrap_or(0);
groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
});
}
let t_agg = t0.elapsed();
// Write JSON directly (no serde_json::Value allocation overhead)
let mut json_buf = String::with_capacity(groups.len() * 128);
write_hexagons_json(
&mut json_buf,
&groups,
&min_keys,
&max_keys,
num_features,
);
let t_total = t0.elapsed();
eprintln!(
"hexagons: res={} cells={} agg={:?} json={:?} total={:?} bytes={}",
resolution,
groups.len(),
t_agg,
t_total - t_agg,
t_total,
json_buf.len()
);
json_buf
})
.await
.unwrap();
Ok(([("content-type", "application/json")], json_body))
}
// ── /api/pois ──
#[derive(Deserialize)]
pub struct POIParams {
bounds: Option<String>,
/// Comma-separated list of categories to filter by
categories: Option<String>,
}
#[derive(Serialize)]
pub struct POIsResponse {
pois: Vec<POI>,
}
pub async fn get_pois(
state: Arc<AppState>,
Query(params): Query<POIParams>,
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
let bounds_str = params
.bounds
.ok_or((StatusCode::BAD_REQUEST, "bounds parameter is required".into()))?;
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
let (south, west, north, east) = (parts[0], parts[1], parts[2], parts[3]);
// Parse category filter if provided
let category_filter: Option<Vec<String>> = params
.categories
.as_deref()
.filter(|s| !s.is_empty())
.map(|s| s.split(',').map(|c| c.trim().to_string()).collect());
// Move CPU-heavy work off the async executor
let result = tokio::task::spawn_blocking(move || {
// Spatial query using grid index
let row_indices = state.poi_grid.query(south, west, north, east);
let pois: Vec<POI> = row_indices
.iter()
.filter_map(|&row_idx| {
let row = row_idx as usize;
// Apply category filter if specified
if let Some(ref categories) = category_filter {
if !categories.contains(&state.poi_data.category[row]) {
return None;
}
}
Some(POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category[row].clone(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji[row].clone(),
})
})
.take(5000)
.collect();
POIsResponse { pois }
})
.await
.unwrap();
Ok(Json(result))
}
// ── /api/poi-categories ──
#[derive(Serialize)]
pub struct POICategoriesResponse {
categories: Vec<String>,
}
pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
// Compute unique categories
let result = tokio::task::spawn_blocking(move || {
let mut categories: Vec<String> = state
.poi_data
.category
.iter()
.cloned()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
categories.sort();
POICategoriesResponse { categories }
})
.await
.unwrap();
Json(result)
}