Refactor the server

This commit is contained in:
Andras Schmelczer 2026-01-31 20:25:54 +00:00
parent 3b9ad11d71
commit 01ec17ff04
15 changed files with 939 additions and 1226 deletions

View file

@ -0,0 +1,87 @@
use std::sync::Arc;
use axum::response::Json;
use serde::Serialize;
use tracing::info;
use crate::data::Histogram;
use crate::state::AppState;
#[derive(Serialize)]
#[serde(tag = "type")]
pub enum FeatureInfo {
#[serde(rename = "numeric")]
Numeric {
name: String,
label: String,
min: f64,
max: f64,
histogram: Histogram,
},
#[serde(rename = "enum")]
Enum {
name: String,
label: String,
values: Vec<String>,
},
}
#[derive(Serialize)]
pub struct FeaturesResponse {
features: Vec<FeatureInfo>,
}
fn snake_to_label(name: &str) -> String {
// If name contains '/' or uppercase, assume it's already human-readable
if name.contains('/') || name.chars().any(|c| c.is_uppercase()) {
return name.to_string();
}
name.split('_')
.map(|word| {
let mut chars = word.chars();
match chars.next() {
None => String::new(),
Some(c) => {
let mut s = c.to_uppercase().to_string();
s.extend(chars);
s
}
}
})
.collect::<Vec<_>>()
.join(" ")
}
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
let mut features: Vec<FeatureInfo> = state
.data
.feature_names
.iter()
.enumerate()
.map(|(i, name): (usize, &String)| {
let stats = &state.data.feature_stats[i];
FeatureInfo::Numeric {
name: name.clone(),
label: snake_to_label(name),
min: stats.p_low,
max: stats.p_high,
histogram: stats.histogram.clone(),
}
})
.collect();
for ef in &state.data.enum_features {
features.push(FeatureInfo::Enum {
name: ef.name.clone(),
label: snake_to_label(&ef.name),
values: ef.values.clone(),
});
}
info!(
numeric = features.iter().filter(|f| matches!(f, FeatureInfo::Numeric { .. })).count(),
enums = features.iter().filter(|f| matches!(f, FeatureInfo::Enum { .. })).count(),
"GET /api/features"
);
Json(FeaturesResponse { features })
}

View file

@ -0,0 +1,257 @@
use std::fmt::Write;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use rustc_hash::FxHashMap;
use serde::Deserialize;
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN};
use crate::filter::parse_filters;
use crate::state::AppState;
const BOUNDS_BUFFER_PERCENT: f64 = 0.2;
#[derive(Deserialize)]
pub struct HexagonParams {
resolution: u8,
bounds: Option<String>,
/// Comma-separated filters: `name:min:max,...`
/// Rows must have non-NaN values within [min,max] for each filter.
filters: Option<String>,
}
/// Per-cell accumulator for aggregating features
struct CellAgg {
count: u32,
mins: Vec<f64>,
maxs: Vec<f64>,
}
impl CellAgg {
fn new(num_features: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f64::INFINITY; num_features],
maxs: vec![f64::NEG_INFINITY; num_features],
}
}
/// Add a row using row-major feature_data layout.
/// feature_data[row * num_features + feat_idx] — all features for one row
/// are contiguous, so this reads a single cache line per ~8 features.
#[inline]
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (i, &v) in row_slice.iter().enumerate() {
if v.is_finite() {
if v < self.mins[i] {
self.mins[i] = v;
}
if v > self.maxs[i] {
self.maxs[i] = v;
}
}
}
}
}
/// Write the hexagons JSON response directly to a String buffer,
/// avoiding serde_json::Value allocations entirely.
fn write_hexagons_json(
buf: &mut String,
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
) {
buf.push_str("{\"features\":[");
let mut first = true;
for (&cell_id, agg) in groups {
if !first {
buf.push(',');
}
first = false;
let cell = h3o::CellIndex::try_from(cell_id).unwrap();
write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, agg.count).unwrap();
for i in 0..num_features {
if agg.mins[i] != f64::INFINITY {
write!(
buf,
",\"{}\":{},\"{}\":{}",
min_keys[i], agg.mins[i], max_keys[i], agg.maxs[i]
)
.unwrap();
}
}
buf.push('}');
}
buf.push_str("]}");
}
pub async fn get_hexagons(
state: Arc<AppState>,
Query(params): Query<HexagonParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let resolution = params.resolution;
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
warn!(resolution, "Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX);
return Err((
StatusCode::BAD_REQUEST,
format!(
"resolution must be between {} and {}",
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
),
));
}
let bounds_str = params.bounds.ok_or((
StatusCode::BAD_REQUEST,
"bounds parameter is required".into(),
))?;
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
let (mut south, mut west, mut north, mut east) = (parts[0], parts[1], parts[2], parts[3]);
let lat_range = north - south;
let lng_range = east - west;
south -= lat_range * BOUNDS_BUFFER_PERCENT;
north += lat_range * BOUNDS_BUFFER_PERCENT;
west -= lng_range * BOUNDS_BUFFER_PERCENT;
east += lng_range * BOUNDS_BUFFER_PERCENT;
let precision = 0.01;
south = (south / precision).floor() * precision;
west = (west / precision).floor() * precision;
north = (north / precision).ceil() * precision;
east = (east / precision).ceil() * precision;
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let json_body = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let min_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("min_{}", n))
.collect();
let max_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("max_{}", n))
.collect();
let h3_cells_for_res: Option<&[u64]> = state
.h3_cells
.get(resolution as usize)
.filter(|v| !v.is_empty())
.map(|v| v.as_slice());
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let enum_features = &state.data.enum_features;
// Row-level filter check: numeric must be non-NaN and within [min, max],
// enum must have value index in the allowed set
let row_passes = |row: usize| -> bool {
parsed_filters.iter().all(|f| {
let v = feature_data[row * num_features + f.feat_idx];
v.is_finite() && v >= f.min && v <= f.max
}) && parsed_enum_filters.iter().all(|ef| {
let v = enum_features[ef.enum_idx].data[row];
v != 255 && ef.allowed.contains(&v)
})
};
if let Some(precomputed) = h3_cells_for_res {
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes(row) {
return;
}
let cell_id = precomputed[row];
groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
});
} else {
let h3_res = h3o::Resolution::try_from(resolution).unwrap();
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes(row) {
return;
}
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
.map(|c| u64::from(c.to_cell(h3_res)))
.unwrap_or(0);
groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
});
}
let t_agg = t0.elapsed();
let mut json_buf = String::with_capacity(groups.len() * 128);
write_hexagons_json(&mut json_buf, &groups, &min_keys, &max_keys, num_features);
let t_total = t0.elapsed();
info!(
resolution,
cells = groups.len(),
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
bytes = json_buf.len(),
"GET /api/hexagons"
);
json_buf
})
.await
.unwrap();
Ok(([("content-type", "application/json")], json_body))
}

View file

@ -0,0 +1,9 @@
mod features;
mod hexagons;
mod pois;
mod properties;
pub use features::get_features;
pub use hexagons::get_hexagons;
pub use pois::{get_poi_categories, get_pois};
pub use properties::get_hexagon_properties;

View file

@ -0,0 +1,133 @@
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::data::POI;
use crate::state::AppState;
#[derive(Deserialize)]
pub struct POIParams {
bounds: Option<String>,
/// Comma-separated list of categories to filter by
categories: Option<String>,
}
#[derive(Serialize)]
pub struct POIsResponse {
pois: Vec<POI>,
}
pub async fn get_pois(
state: Arc<AppState>,
Query(params): Query<POIParams>,
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
let bounds_str = params.bounds.ok_or((
StatusCode::BAD_REQUEST,
"bounds parameter is required".into(),
))?;
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
let (south, west, north, east) = (parts[0], parts[1], parts[2], parts[3]);
let categories_str = params.categories.clone();
let category_filter: Option<Vec<String>> = params
.categories
.as_deref()
.filter(|s| !s.is_empty())
.map(|s| s.split(',').map(|c| c.trim().to_string()).collect());
let num_categories = category_filter.as_ref().map(|c| c.len()).unwrap_or(0);
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let row_indices = state.poi_grid.query(south, west, north, east);
let pois: Vec<POI> = row_indices
.iter()
.filter_map(|&row_idx| {
let row = row_idx as usize;
if let Some(ref categories) = category_filter {
if !categories.contains(&state.poi_data.category[row]) {
return None;
}
}
Some(POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category[row].clone(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji[row].clone(),
})
})
.take(5000)
.collect();
let elapsed = t0.elapsed();
info!(
results = pois.len(),
candidates = row_indices.len(),
categories = num_categories,
categories_raw = categories_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/pois"
);
POIsResponse { pois }
})
.await
.unwrap();
Ok(Json(result))
}
#[derive(Serialize)]
pub struct POICategoriesResponse {
categories: Vec<String>,
}
pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
let result = tokio::task::spawn_blocking(move || {
let mut categories: Vec<String> = state
.poi_data
.category
.iter()
.cloned()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
categories.sort();
info!(count = categories.len(), "GET /api/poi-categories");
POICategoriesResponse { categories }
})
.await
.unwrap();
Json(result)
}

View file

@ -0,0 +1,198 @@
use std::str::FromStr;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::filter::{parse_filters, row_passes_filters};
use crate::state::AppState;
#[derive(Deserialize)]
pub struct HexagonPropertiesParams {
pub h3: String,
pub resolution: u8,
pub filters: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
#[derive(Serialize)]
pub struct Property {
// String fields
pub address: Option<String>,
pub postcode: Option<String>,
pub property_type: Option<String>,
pub built_form: Option<String>,
pub duration: Option<String>,
pub current_energy_rating: Option<String>,
pub potential_energy_rating: Option<String>,
// Numeric fields
pub lat: f64,
pub lon: f64,
#[serde(flatten)]
pub features: FxHashMap<String, f64>,
}
#[derive(Serialize)]
pub struct HexagonPropertiesResponse {
pub properties: Vec<Property>,
pub total: usize,
pub limit: usize,
pub offset: usize,
pub truncated: bool,
}
pub async fn get_hexagon_properties(
state: Arc<AppState>,
Query(params): Query<HexagonPropertiesParams>,
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
let cell = h3o::CellIndex::from_str(&params.h3)
.map_err(|e| {
warn!(h3 = %params.h3, error = %e, "Invalid H3 cell index");
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e))
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
warn!(resolution, "Invalid or non-precomputed resolution for hexagon-properties");
return Err((
StatusCode::BAD_REQUEST,
"Invalid or non-precomputed resolution".to_string(),
));
}
let h3_str = params.h3.clone();
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let h3_data = &state.h3_cells[resolution];
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let enum_features = &state.data.enum_features;
let matching_rows: Vec<usize> = h3_data
.iter()
.enumerate()
.filter_map(|(idx, &h3_cell)| {
if h3_cell == cell_u64 {
if row_passes_filters(
idx,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
enum_features,
) {
Some(idx)
} else {
None
}
} else {
None
}
})
.collect();
let total = matching_rows.len();
let limit = params.limit.unwrap_or(100).min(500);
let offset = params.offset.unwrap_or(0);
let truncated = total > offset + limit;
let properties: Vec<Property> = matching_rows
.iter()
.skip(offset)
.take(limit)
.map(|&row| {
let mut features = FxHashMap::default();
let base = row * num_features;
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
let v = feature_data[base + feat_idx];
if v.is_finite() {
features.insert(feat_name.clone(), v);
}
}
let get_string = |s: &str| -> Option<String> {
let trimmed = s.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
};
let get_enum_value = |names: &[&str]| -> Option<String> {
for name in names {
if let Some(val) = enum_features.iter().find_map(|ef| {
if ef.name == *name {
let idx = ef.data[row];
if idx == 255 {
None
} else {
ef.values.get(idx as usize).cloned()
}
} else {
None
}
}) {
return Some(val);
}
}
None
};
Property {
address: get_string(&state.data.address[row]),
postcode: get_string(&state.data.postcode[row]),
property_type: get_enum_value(&["Property type", "epc_property_type", "pp_property_type"]),
built_form: get_enum_value(&["Property type/built form", "built_form"]),
duration: get_enum_value(&["Leashold/Freehold", "duration"]),
current_energy_rating: get_enum_value(&["Current energy rating", "current_energy_rating"]),
potential_energy_rating: get_enum_value(&["Potential energy rating", "potential_energy_rating"]),
lat: state.data.lat[row],
lon: state.data.lon[row],
features,
}
})
.collect();
let elapsed = t0.elapsed();
info!(
h3 = %h3_str,
resolution,
total,
returned = properties.len(),
offset,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-properties"
);
HexagonPropertiesResponse {
properties,
total,
limit,
offset,
truncated,
}
})
.await
.unwrap();
Ok(Json(result))
}