636 lines
19 KiB
Rust
636 lines
19 KiB
Rust
use std::fmt::Write;
|
|
use std::str::FromStr;
|
|
use std::sync::Arc;
|
|
|
|
use axum::extract::Query;
|
|
use axum::http::StatusCode;
|
|
use axum::response::{IntoResponse, Json};
|
|
use rustc_hash::FxHashMap;
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use crate::consts::{H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN};
|
|
use crate::data::{Histogram, POIData, PropertyData, POI};
|
|
use crate::index::GridIndex;
|
|
|
|
/// Shared application state
|
|
pub struct AppState {
|
|
pub data: PropertyData,
|
|
pub grid: GridIndex,
|
|
/// h3_cells[resolution][row_idx] = precomputed H3 cell ID.
|
|
/// Empty Vec for resolutions not precomputed.
|
|
pub h3_cells: Vec<Vec<u64>>,
|
|
pub poi_data: POIData,
|
|
pub poi_grid: GridIndex,
|
|
}
|
|
|
|
const BOUNDS_BUFFER_PERCENT: f64 = 0.2;
|
|
|
|
// ── /api/features ──
|
|
|
|
#[derive(Serialize)]
|
|
pub struct FeatureInfo {
|
|
name: String,
|
|
min: f64,
|
|
max: f64,
|
|
label: String,
|
|
histogram: Histogram,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct FeaturesResponse {
|
|
features: Vec<FeatureInfo>,
|
|
}
|
|
|
|
fn snake_to_label(name: &str) -> String {
|
|
name.split('_')
|
|
.map(|word| {
|
|
let mut chars = word.chars();
|
|
match chars.next() {
|
|
None => String::new(),
|
|
Some(c) => {
|
|
let mut s = c.to_uppercase().to_string();
|
|
s.extend(chars);
|
|
s
|
|
}
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
}
|
|
|
|
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
|
let features = state
|
|
.data
|
|
.feature_names
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, name): (usize, &String)| {
|
|
let stats = &state.data.feature_stats[i];
|
|
FeatureInfo {
|
|
name: name.clone(),
|
|
min: stats.p_low,
|
|
max: stats.p_high,
|
|
label: snake_to_label(name),
|
|
histogram: stats.histogram.clone(),
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
Json(FeaturesResponse { features })
|
|
}
|
|
|
|
// ── /api/hexagons ──
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct HexagonParams {
|
|
resolution: u8,
|
|
bounds: Option<String>,
|
|
/// Comma-separated filters: `name:min:max,...`
|
|
/// Rows must have non-NaN values within [min,max] for each filter.
|
|
filters: Option<String>,
|
|
}
|
|
|
|
struct ParsedFilter {
|
|
feat_idx: usize,
|
|
min: f64,
|
|
max: f64,
|
|
}
|
|
|
|
/// Per-cell accumulator for aggregating features
|
|
struct CellAgg {
|
|
count: u32,
|
|
mins: Vec<f64>,
|
|
maxs: Vec<f64>,
|
|
}
|
|
|
|
impl CellAgg {
|
|
fn new(num_features: usize) -> Self {
|
|
CellAgg {
|
|
count: 0,
|
|
mins: vec![f64::INFINITY; num_features],
|
|
maxs: vec![f64::NEG_INFINITY; num_features],
|
|
}
|
|
}
|
|
|
|
/// Add a row using row-major feature_data layout.
|
|
/// feature_data[row * num_features + feat_idx] — all features for one row
|
|
/// are contiguous, so this reads a single cache line per ~8 features.
|
|
#[inline]
|
|
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
|
|
self.count += 1;
|
|
let base = row * num_features;
|
|
let row_slice = &feature_data[base..base + num_features];
|
|
for (i, &v) in row_slice.iter().enumerate() {
|
|
if v.is_finite() {
|
|
if v < self.mins[i] {
|
|
self.mins[i] = v;
|
|
}
|
|
if v > self.maxs[i] {
|
|
self.maxs[i] = v;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write the hexagons JSON response directly to a String buffer,
|
|
/// avoiding serde_json::Value allocations entirely.
|
|
fn write_hexagons_json(
|
|
buf: &mut String,
|
|
groups: &FxHashMap<u64, CellAgg>,
|
|
min_keys: &[String],
|
|
max_keys: &[String],
|
|
num_features: usize,
|
|
) {
|
|
buf.push_str("{\"features\":[");
|
|
let mut first = true;
|
|
for (&cell_id, agg) in groups {
|
|
if !first {
|
|
buf.push(',');
|
|
}
|
|
first = false;
|
|
|
|
let cell = h3o::CellIndex::try_from(cell_id).unwrap();
|
|
write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, agg.count).unwrap();
|
|
|
|
for i in 0..num_features {
|
|
if agg.mins[i] != f64::INFINITY {
|
|
write!(
|
|
buf,
|
|
",\"{}\":{},\"{}\":{}",
|
|
min_keys[i], agg.mins[i], max_keys[i], agg.maxs[i]
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
buf.push('}');
|
|
}
|
|
buf.push_str("]}");
|
|
}
|
|
|
|
pub async fn get_hexagons(
|
|
state: Arc<AppState>,
|
|
Query(params): Query<HexagonParams>,
|
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
|
let resolution = params.resolution;
|
|
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
|
|
return Err((
|
|
StatusCode::BAD_REQUEST,
|
|
format!(
|
|
"resolution must be between {} and {}",
|
|
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
|
|
),
|
|
));
|
|
}
|
|
|
|
let bounds_str = params.bounds.ok_or((
|
|
StatusCode::BAD_REQUEST,
|
|
"bounds parameter is required".into(),
|
|
))?;
|
|
|
|
let parts: Vec<f64> = bounds_str
|
|
.split(',')
|
|
.map(|s| s.trim().parse::<f64>())
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.map_err(|_| {
|
|
(
|
|
StatusCode::BAD_REQUEST,
|
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
|
)
|
|
})?;
|
|
|
|
if parts.len() != 4 {
|
|
return Err((
|
|
StatusCode::BAD_REQUEST,
|
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
|
));
|
|
}
|
|
|
|
let (mut south, mut west, mut north, mut east) = (parts[0], parts[1], parts[2], parts[3]);
|
|
|
|
// Apply bounds buffer (20%)
|
|
let lat_range = north - south;
|
|
let lng_range = east - west;
|
|
south -= lat_range * BOUNDS_BUFFER_PERCENT;
|
|
north += lat_range * BOUNDS_BUFFER_PERCENT;
|
|
west -= lng_range * BOUNDS_BUFFER_PERCENT;
|
|
east += lng_range * BOUNDS_BUFFER_PERCENT;
|
|
|
|
// Quantize to 0.01 degree precision
|
|
let precision = 0.01;
|
|
south = (south / precision).floor() * precision;
|
|
west = (west / precision).floor() * precision;
|
|
north = (north / precision).ceil() * precision;
|
|
east = (east / precision).ceil() * precision;
|
|
|
|
// Parse filters: `name:min:max,...`
|
|
let parsed_filters: Vec<ParsedFilter> = params
|
|
.filters
|
|
.as_deref()
|
|
.filter(|s| !s.is_empty())
|
|
.map(|s| {
|
|
s.split(',')
|
|
.filter_map(|entry| {
|
|
let parts: Vec<&str> = entry.splitn(3, ':').collect();
|
|
if parts.len() != 3 {
|
|
return None;
|
|
}
|
|
let name = parts[0].trim();
|
|
let min = parts[1].trim().parse::<f64>().ok()?;
|
|
let max = parts[2].trim().parse::<f64>().ok()?;
|
|
let feat_idx = state.data.feature_names.iter().position(|n| n == name)?;
|
|
Some(ParsedFilter { feat_idx, min, max })
|
|
})
|
|
.collect()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
// Move CPU-heavy work off the async executor
|
|
let json_body = tokio::task::spawn_blocking(move || {
|
|
let t0 = std::time::Instant::now();
|
|
|
|
let num_features = state.data.num_features;
|
|
let feature_data = &state.data.feature_data;
|
|
|
|
// Pre-compute JSON key strings once
|
|
let min_keys: Vec<String> = state
|
|
.data
|
|
.feature_names
|
|
.iter()
|
|
.map(|n| format!("min_{}", n))
|
|
.collect();
|
|
let max_keys: Vec<String> = state
|
|
.data
|
|
.feature_names
|
|
.iter()
|
|
.map(|n| format!("max_{}", n))
|
|
.collect();
|
|
|
|
// Use precomputed H3 cells if available
|
|
let h3_cells_for_res: Option<&[u64]> = state
|
|
.h3_cells
|
|
.get(resolution as usize)
|
|
.filter(|v| !v.is_empty())
|
|
.map(|v| v.as_slice());
|
|
|
|
// Aggregate using FxHashMap (fast non-crypto hash for integer keys)
|
|
// and grid visitor (no intermediate Vec<u32> allocation)
|
|
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
|
|
|
// Row-level filter check: value must be non-NaN and within [min, max]
|
|
let row_passes = |row: usize| -> bool {
|
|
parsed_filters.iter().all(|f| {
|
|
let v = feature_data[row * num_features + f.feat_idx];
|
|
v.is_finite() && v >= f.min && v <= f.max
|
|
})
|
|
};
|
|
|
|
if let Some(precomputed) = h3_cells_for_res {
|
|
// Fast path: precomputed H3 + visitor pattern
|
|
state
|
|
.grid
|
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
|
let row = row_idx as usize;
|
|
if !row_passes(row) {
|
|
return;
|
|
}
|
|
let cell_id = precomputed[row];
|
|
groups
|
|
.entry(cell_id)
|
|
.or_insert_with(|| CellAgg::new(num_features))
|
|
.add_row(feature_data, row, num_features);
|
|
});
|
|
} else {
|
|
// Fallback: compute H3 on-the-fly
|
|
let h3_res = h3o::Resolution::try_from(resolution).unwrap();
|
|
state
|
|
.grid
|
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
|
let row = row_idx as usize;
|
|
if !row_passes(row) {
|
|
return;
|
|
}
|
|
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
|
|
.map(|c| u64::from(c.to_cell(h3_res)))
|
|
.unwrap_or(0);
|
|
groups
|
|
.entry(cell_id)
|
|
.or_insert_with(|| CellAgg::new(num_features))
|
|
.add_row(feature_data, row, num_features);
|
|
});
|
|
}
|
|
|
|
let t_agg = t0.elapsed();
|
|
|
|
// Write JSON directly (no serde_json::Value allocation overhead)
|
|
let mut json_buf = String::with_capacity(groups.len() * 128);
|
|
write_hexagons_json(&mut json_buf, &groups, &min_keys, &max_keys, num_features);
|
|
|
|
let t_total = t0.elapsed();
|
|
eprintln!(
|
|
"hexagons: res={} cells={} agg={:?} json={:?} total={:?} bytes={}",
|
|
resolution,
|
|
groups.len(),
|
|
t_agg,
|
|
t_total - t_agg,
|
|
t_total,
|
|
json_buf.len()
|
|
);
|
|
|
|
json_buf
|
|
})
|
|
.await
|
|
.unwrap();
|
|
|
|
Ok(([("content-type", "application/json")], json_body))
|
|
}
|
|
|
|
// ── /api/pois ──
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct POIParams {
|
|
bounds: Option<String>,
|
|
/// Comma-separated list of categories to filter by
|
|
categories: Option<String>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct POIsResponse {
|
|
pois: Vec<POI>,
|
|
}
|
|
|
|
pub async fn get_pois(
|
|
state: Arc<AppState>,
|
|
Query(params): Query<POIParams>,
|
|
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
|
|
let bounds_str = params.bounds.ok_or((
|
|
StatusCode::BAD_REQUEST,
|
|
"bounds parameter is required".into(),
|
|
))?;
|
|
|
|
let parts: Vec<f64> = bounds_str
|
|
.split(',')
|
|
.map(|s| s.trim().parse::<f64>())
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.map_err(|_| {
|
|
(
|
|
StatusCode::BAD_REQUEST,
|
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
|
)
|
|
})?;
|
|
|
|
if parts.len() != 4 {
|
|
return Err((
|
|
StatusCode::BAD_REQUEST,
|
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
|
));
|
|
}
|
|
|
|
let (south, west, north, east) = (parts[0], parts[1], parts[2], parts[3]);
|
|
|
|
// Parse category filter if provided
|
|
let category_filter: Option<Vec<String>> = params
|
|
.categories
|
|
.as_deref()
|
|
.filter(|s| !s.is_empty())
|
|
.map(|s| s.split(',').map(|c| c.trim().to_string()).collect());
|
|
|
|
// Move CPU-heavy work off the async executor
|
|
let result = tokio::task::spawn_blocking(move || {
|
|
// Spatial query using grid index
|
|
let row_indices = state.poi_grid.query(south, west, north, east);
|
|
|
|
let pois: Vec<POI> = row_indices
|
|
.iter()
|
|
.filter_map(|&row_idx| {
|
|
let row = row_idx as usize;
|
|
|
|
// Apply category filter if specified
|
|
if let Some(ref categories) = category_filter {
|
|
if !categories.contains(&state.poi_data.category[row]) {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
Some(POI {
|
|
id: state.poi_data.id[row].clone(),
|
|
name: state.poi_data.name[row].clone(),
|
|
category: state.poi_data.category[row].clone(),
|
|
lat: state.poi_data.lat[row],
|
|
lng: state.poi_data.lng[row],
|
|
emoji: state.poi_data.emoji[row].clone(),
|
|
})
|
|
})
|
|
.take(5000)
|
|
.collect();
|
|
|
|
POIsResponse { pois }
|
|
})
|
|
.await
|
|
.unwrap();
|
|
|
|
Ok(Json(result))
|
|
}
|
|
|
|
// ── /api/poi-categories ──
|
|
|
|
#[derive(Serialize)]
|
|
pub struct POICategoriesResponse {
|
|
categories: Vec<String>,
|
|
}
|
|
|
|
pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
|
|
// Compute unique categories
|
|
let result = tokio::task::spawn_blocking(move || {
|
|
let mut categories: Vec<String> = state
|
|
.poi_data
|
|
.category
|
|
.iter()
|
|
.cloned()
|
|
.collect::<std::collections::HashSet<_>>()
|
|
.into_iter()
|
|
.collect();
|
|
|
|
categories.sort();
|
|
|
|
POICategoriesResponse { categories }
|
|
})
|
|
.await
|
|
.unwrap();
|
|
|
|
Json(result)
|
|
}
|
|
|
|
// ── /api/hexagon-properties ──
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct HexagonPropertiesParams {
|
|
pub h3: String,
|
|
pub resolution: u8,
|
|
pub filters: Option<String>,
|
|
pub limit: Option<usize>,
|
|
pub offset: Option<usize>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct Property {
|
|
// String fields
|
|
pub address: Option<String>,
|
|
pub postcode: Option<String>,
|
|
pub property_type: Option<String>,
|
|
pub built_form: Option<String>,
|
|
pub current_energy_rating: Option<String>,
|
|
pub potential_energy_rating: Option<String>,
|
|
|
|
// Numeric fields
|
|
pub lat: f64,
|
|
pub lon: f64,
|
|
|
|
// All other numeric features stored as dynamic map
|
|
#[serde(flatten)]
|
|
pub features: FxHashMap<String, f64>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct HexagonPropertiesResponse {
|
|
pub properties: Vec<Property>,
|
|
pub total: usize,
|
|
pub limit: usize,
|
|
pub offset: usize,
|
|
pub truncated: bool,
|
|
}
|
|
|
|
/// Helper function to check if a row passes all filters
|
|
fn row_passes_filters(
|
|
row: usize,
|
|
filters: &[ParsedFilter],
|
|
feature_data: &[f64],
|
|
num_features: usize,
|
|
) -> bool {
|
|
filters.iter().all(|f| {
|
|
let v = feature_data[row * num_features + f.feat_idx];
|
|
v.is_finite() && v >= f.min && v <= f.max
|
|
})
|
|
}
|
|
|
|
pub async fn get_hexagon_properties(
|
|
state: Arc<AppState>,
|
|
Query(params): Query<HexagonPropertiesParams>,
|
|
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
|
|
// 1. Parse H3 cell ID
|
|
let cell = h3o::CellIndex::from_str(¶ms.h3)
|
|
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e)))?;
|
|
let cell_u64: u64 = cell.into();
|
|
|
|
// 2. Validate resolution
|
|
let resolution = params.resolution as usize;
|
|
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
|
return Err((
|
|
StatusCode::BAD_REQUEST,
|
|
"Invalid or non-precomputed resolution".to_string(),
|
|
));
|
|
}
|
|
|
|
// 3. Parse filters (reuse existing filter parsing logic from get_hexagons)
|
|
let parsed_filters: Vec<ParsedFilter> = params
|
|
.filters
|
|
.as_deref()
|
|
.filter(|s| !s.is_empty())
|
|
.map(|s| {
|
|
s.split(',')
|
|
.filter_map(|entry| {
|
|
let parts: Vec<&str> = entry.splitn(3, ':').collect();
|
|
if parts.len() != 3 {
|
|
return None;
|
|
}
|
|
let name = parts[0].trim();
|
|
let min = parts[1].trim().parse::<f64>().ok()?;
|
|
let max = parts[2].trim().parse::<f64>().ok()?;
|
|
let feat_idx = state.data.feature_names.iter().position(|n| n == name)?;
|
|
Some(ParsedFilter { feat_idx, min, max })
|
|
})
|
|
.collect()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
// Move CPU-heavy work off the async executor
|
|
let result = tokio::task::spawn_blocking(move || {
|
|
let h3_data = &state.h3_cells[resolution];
|
|
let num_features = state.data.num_features;
|
|
let feature_data = &state.data.feature_data;
|
|
|
|
// 4. Find all rows with matching H3 cell
|
|
let matching_rows: Vec<usize> = h3_data
|
|
.iter()
|
|
.enumerate()
|
|
.filter_map(|(idx, &h3_cell)| {
|
|
if h3_cell == cell_u64 {
|
|
// Apply feature filters
|
|
if row_passes_filters(idx, &parsed_filters, feature_data, num_features) {
|
|
Some(idx)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
let total = matching_rows.len();
|
|
let limit = params.limit.unwrap_or(100).min(500);
|
|
let offset = params.offset.unwrap_or(0);
|
|
let truncated = total > offset + limit;
|
|
|
|
// 5. Extract properties for paginated subset
|
|
let properties: Vec<Property> = matching_rows
|
|
.iter()
|
|
.skip(offset)
|
|
.take(limit)
|
|
.map(|&row| {
|
|
// Build dynamic features map
|
|
let mut features = FxHashMap::default();
|
|
let base = row * num_features;
|
|
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
|
|
let v = feature_data[base + feat_idx];
|
|
if v.is_finite() {
|
|
features.insert(feat_name.clone(), v);
|
|
}
|
|
}
|
|
|
|
// Helper to get non-empty string
|
|
let get_string = |s: &str| -> Option<String> {
|
|
if s.is_empty() {
|
|
None
|
|
} else {
|
|
Some(s.to_string())
|
|
}
|
|
};
|
|
|
|
Property {
|
|
address: get_string(&state.data.address[row]),
|
|
postcode: get_string(&state.data.postcode[row]),
|
|
property_type: get_string(&state.data.property_type[row]),
|
|
built_form: get_string(&state.data.built_form[row]),
|
|
current_energy_rating: get_string(&state.data.current_energy_rating[row]),
|
|
potential_energy_rating: get_string(&state.data.potential_energy_rating[row]),
|
|
lat: state.data.lat[row],
|
|
lon: state.data.lon[row],
|
|
features,
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
HexagonPropertiesResponse {
|
|
properties,
|
|
total,
|
|
limit,
|
|
offset,
|
|
truncated,
|
|
}
|
|
})
|
|
.await
|
|
.unwrap();
|
|
|
|
Ok(Json(result))
|
|
}
|