Optimisations

This commit is contained in:
Andras Schmelczer 2026-02-01 21:00:59 +00:00
parent 66c2a25457
commit 9179acd4cd
21 changed files with 653 additions and 139 deletions

View file

@ -14,9 +14,9 @@ pub enum FeatureInfo {
#[serde(rename = "numeric")]
Numeric {
name: String,
min: f64,
max: f64,
step: f64,
min: f32,
max: f32,
step: f32,
histogram: Histogram,
description: &'static str,
detail: &'static str,

View file

@ -8,7 +8,7 @@ use axum::response::IntoResponse;
use serde::Deserialize;
use tracing::{info, warn};
use crate::consts::{ENUM_NULL, HISTOGRAM_BINS};
use crate::consts::{ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
use crate::filter::{parse_filters, row_passes_filters};
use crate::state::AppState;
@ -31,17 +31,21 @@ pub async fn get_hexagon_stats(
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
let resolution = params.resolution;
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
warn!(
resolution,
"Invalid or non-precomputed resolution for hexagon-stats"
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
);
return Err((
StatusCode::BAD_REQUEST,
"Invalid or non-precomputed resolution".to_string(),
format!(
"resolution must be between {} and {}",
H3_REQUEST_MIN, H3_REQUEST_MAX
),
));
}
let resolution_idx = resolution as usize;
let h3_str = params.h3.clone();
let filters_str = params.filters.clone();
@ -54,7 +58,13 @@ pub async fn get_hexagon_stats(
let result = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let h3_data = &state.h3_cells[resolution];
let precomputed: Option<&[u64]> = state
.h3_cells
.get(resolution_idx)
.filter(|cells| !cells.is_empty())
.map(|cells| cells.as_slice());
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let enum_features = &state.data.enum_features;
@ -67,7 +77,14 @@ pub async fn get_hexagon_stats(
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if h3_data[row] == cell_u64
let row_cell = if let Some(h3_data) = precomputed {
h3_data[row]
} else {
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
.map(|coord| u64::from(coord.to_cell(h3_res)))
.unwrap_or(0)
};
if row_cell == cell_u64
&& row_passes_filters(
row,
&parsed_filters,
@ -98,9 +115,9 @@ pub async fn get_hexagon_stats(
let bin_width = global_stats.histogram.bin_width;
let mut count = 0usize;
let mut min_value = f64::INFINITY;
let mut max_value = f64::NEG_INFINITY;
let mut sum = 0.0f64;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64; // keep f64 for mean precision
let mut bins = vec![0u64; HISTOGRAM_BINS];
for &row in &matching_rows {
@ -113,12 +130,12 @@ pub async fn get_hexagon_stats(
if value > max_value {
max_value = value;
}
sum += value;
sum += value as f64;
// Bin into histogram using global edges
// Bin into histogram using global edges (cast to f64 for bin index math)
if bin_width > 0.0 {
let bin_index =
((value - histogram_min) / bin_width).floor() as isize;
((value as f64 - histogram_min as f64) / bin_width as f64).floor() as isize;
let clamped_index = bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
bins[clamped_index] += 1;
}
@ -138,15 +155,15 @@ pub async fn get_hexagon_stats(
output.push_str("{\"name\":");
write_json_string(&mut output, feature_name);
write!(output, ",\"count\":{}", count).unwrap();
write!(output, ",\"min\":{}", format_f64(min_value)).unwrap();
write!(output, ",\"max\":{}", format_f64(max_value)).unwrap();
write!(output, ",\"min\":{}", format_num(min_value)).unwrap();
write!(output, ",\"max\":{}", format_num(max_value)).unwrap();
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
output.push_str(",\"histogram\":{\"min\":");
write!(output, "{}", format_f64(histogram_min)).unwrap();
write!(output, "{}", format_num(histogram_min)).unwrap();
output.push_str(",\"max\":");
write!(output, "{}", format_f64(histogram_max)).unwrap();
write!(output, "{}", format_num(histogram_max)).unwrap();
output.push_str(",\"bin_width\":");
write!(output, "{}", format_f64(bin_width)).unwrap();
write!(output, "{}", format_num(bin_width)).unwrap();
output.push_str(",\"counts\":[");
for (bin_index, &bin_count) in bins.iter().enumerate() {
if bin_index > 0 {
@ -216,10 +233,11 @@ pub async fn get_hexagon_stats(
"GET /api/hexagon-stats"
);
output
Ok(output)
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok((
[(axum::http::header::CONTENT_TYPE, "application/json")],
@ -242,6 +260,15 @@ fn write_json_string(output: &mut String, value: &str) {
output.push('"');
}
fn format_num(value: f32) -> String {
let fv = value as f64;
if fv.fract() == 0.0 && fv.abs() < 1e15 {
format!("{:.1}", fv)
} else {
format!("{}", fv)
}
}
fn format_f64(value: f64) -> String {
if value.fract() == 0.0 && value.abs() < 1e15 {
format!("{:.1}", value)

View file

@ -9,7 +9,7 @@ use serde::Deserialize;
use tracing::{info, warn};
use crate::consts::{
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN,
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN,
POSTCODE_MIN_RESOLUTION,
};
use crate::filter::parse_filters;
@ -44,8 +44,8 @@ pub struct HexagonParams {
/// Per-cell accumulator for aggregating features
struct CellAgg {
count: u32,
mins: Vec<f64>,
maxs: Vec<f64>,
mins: Vec<f32>,
maxs: Vec<f32>,
/// Min/max ordinal indices for enum features (255 = no data yet)
enum_mins: Vec<u8>,
enum_maxs: Vec<u8>,
@ -60,8 +60,8 @@ impl CellAgg {
fn new(num_features: usize, num_enums: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f64::INFINITY; num_features],
maxs: vec![f64::NEG_INFINITY; num_features],
mins: vec![f32::INFINITY; num_features],
maxs: vec![f32::NEG_INFINITY; num_features],
enum_mins: vec![ENUM_NULL; num_enums],
enum_maxs: vec![0; num_enums],
postcode: None,
@ -75,7 +75,7 @@ impl CellAgg {
/// feature_data[row * num_features + feat_idx] — all features for one row
/// are contiguous, so this reads a single cache line per ~8 features.
#[inline]
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
@ -110,9 +110,9 @@ impl CellAgg {
/// Track postcode and centroid for high-resolution cells.
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
#[inline]
fn add_postcode(&mut self, postcode: &str, lat: f64, lon: f64) {
self.lat_sum += lat;
self.lon_sum += lon;
fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) {
self.lat_sum += lat as f64;
self.lon_sum += lon as f64;
if postcode.is_empty() {
return;
}
@ -212,16 +212,16 @@ pub async fn get_hexagons(
Query(params): Query<HexagonParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let resolution = params.resolution;
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
warn!(
resolution,
"Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
);
return Err((
StatusCode::BAD_REQUEST,
format!(
"resolution must be between {} and {}",
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
H3_REQUEST_MIN, H3_REQUEST_MAX
),
));
}
@ -304,7 +304,7 @@ pub async fn get_hexagons(
aggregation.add_enums(enum_features, row);
if include_postcode {
aggregation.add_postcode(
&state.data.postcode[row],
state.data.postcode(row),
state.data.lat[row],
state.data.lon[row],
);
@ -320,7 +320,7 @@ pub async fn get_hexagons(
if !row_passes(row) {
return;
}
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
.map(|coord| u64::from(coord.to_cell(h3_res)))
.unwrap_or(0);
let aggregation = groups
@ -330,7 +330,7 @@ pub async fn get_hexagons(
aggregation.add_enums(enum_features, row);
if include_postcode {
aggregation.add_postcode(
&state.data.postcode[row],
state.data.postcode(row),
state.data.lat[row],
state.data.lon[row],
);

View file

@ -55,7 +55,7 @@ pub async fn get_pois(
.filter_map(|&row_idx| {
let row = row_idx as usize;
if let Some(ref categories) = category_filter {
if !categories.contains(&state.poi_data.category[row]) {
if !categories.contains(state.poi_data.category.get(row)) {
return None;
}
}
@ -83,11 +83,11 @@ pub async fn get_pois(
.map(|&row| POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category[row].clone(),
group: state.poi_data.group[row].clone(),
category: state.poi_data.category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji[row].clone(),
emoji: state.poi_data.emoji.get(row).to_string(),
})
.collect();

View file

@ -8,7 +8,7 @@ use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, MAX_PROPERTIES_LIMIT};
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT};
use crate::data::EnumFeatureData;
use crate::filter::{parse_filters, row_passes_filters};
use crate::state::AppState;
@ -36,13 +36,13 @@ pub struct Property {
pub potential_energy_rating: Option<String>,
// Numeric fields
pub lat: f64,
pub lon: f64,
pub lat: f32,
pub lon: f32,
pub is_construction_date_approximate: Option<bool>,
#[serde(flatten)]
pub features: FxHashMap<String, f64>,
pub features: FxHashMap<String, f32>,
}
#[derive(Serialize)]
@ -93,17 +93,21 @@ pub async fn get_hexagon_properties(
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
let resolution = params.resolution;
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
warn!(
resolution,
"Invalid or non-precomputed resolution for hexagon-properties"
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
);
return Err((
StatusCode::BAD_REQUEST,
"Invalid or non-precomputed resolution".to_string(),
format!(
"resolution must be between {} and {}",
H3_REQUEST_MIN, H3_REQUEST_MAX
),
));
}
let resolution_idx = resolution as usize;
let h3_str = params.h3.clone();
let filters_str = params.filters.clone();
@ -116,7 +120,13 @@ pub async fn get_hexagon_properties(
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let h3_data = &state.h3_cells[resolution];
let precomputed: Option<&[u64]> = state
.h3_cells
.get(resolution_idx)
.filter(|cells| !cells.is_empty())
.map(|cells| cells.as_slice());
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let enum_features = &state.data.enum_features;
@ -128,7 +138,14 @@ pub async fn get_hexagon_properties(
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if h3_data[row] == cell_u64
let row_cell = if let Some(h3_data) = precomputed {
h3_data[row]
} else {
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
.map(|coord| u64::from(coord.to_cell(h3_res)))
.unwrap_or(0)
};
if row_cell == cell_u64
&& row_passes_filters(
row,
&parsed_filters,
@ -162,8 +179,8 @@ pub async fn get_hexagon_properties(
}
Property {
address: non_empty_string(&state.data.address[row]),
postcode: non_empty_string(&state.data.postcode[row]),
address: non_empty_string(state.data.address(row)),
postcode: non_empty_string(state.data.postcode(row)),
is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
property_type: lookup_enum_value(
enum_features,
@ -215,16 +232,17 @@ pub async fn get_hexagon_properties(
"GET /api/hexagon-properties"
);
HexagonPropertiesResponse {
Ok(HexagonPropertiesResponse {
properties,
total,
limit,
offset,
truncated,
}
})
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok(Json(result))
}