From 2c613dc0d14edff046bb2e33c0e162da49f02b66 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Mon, 2 Feb 2026 20:10:32 +0000 Subject: [PATCH] Optimise --- README.md | 7 +- frontend/src/App.tsx | 19 +- frontend/src/usePlausible.ts | 3 + frontend/webpack.config.js | 6 - server-rs/src/consts.rs | 1 - server-rs/src/filter.rs | 5 +- server-rs/src/grid_index.rs | 58 ++++-- server-rs/src/main.rs | 3 +- server-rs/src/routes/hexagon_stats.rs | 71 ++++--- server-rs/src/routes/hexagons.rs | 262 +++++++++++++++++--------- server-rs/src/routes/pois.rs | 61 +++--- server-rs/src/routes/properties.rs | 53 ++++-- server-rs/src/state.rs | 6 +- server-rs/src/tests.rs | 9 +- 14 files changed, 376 insertions(+), 188 deletions(-) diff --git a/README.md b/README.md index e63ab17..a1b785a 100644 --- a/README.md +++ b/README.md @@ -105,4 +105,9 @@ righmove lins - how to handle too many pois \ No newline at end of file + how to handle too many pois + + + fix zoopla links + + \ No newline at end of file diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 84af9b7..971be4a 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -600,6 +600,12 @@ export default function App() { bounds: boundsStr, }); if (filtersStr) params.set('filters', filtersStr); + // Only request data for the actively viewed feature (reduces bandwidth) + if (viewFeature) { + params.set('fields', viewFeature); + } else { + params.set('fields', ''); + } const res = await fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, { signal: abortControllerRef.current.signal, }); @@ -619,7 +625,7 @@ export default function App() { clearTimeout(debounceRef.current); } }; - }, [resolution, bounds, filters, buildFilterParam]); + }, [resolution, bounds, filters, buildFilterParam, viewFeature]); // During slider drag, use the expanded dataset (without active feature filter) // so both narrowing and expanding are visible. Otherwise use server-filtered data. @@ -745,6 +751,8 @@ export default function App() { const boundsStr = `${bounds.south},${bounds.west},${bounds.north},${bounds.east}`; const params = new URLSearchParams({ resolution: resolution.toString(), bounds: boundsStr }); if (filtersStr) params.set('filters', filtersStr); + // Only request the dragged feature's data + params.set('fields', name); fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, { signal: dragAbortRef.current.signal, @@ -786,7 +794,7 @@ export default function App() { }, []); const fetchHexagonStats = useCallback( - async (h3: string, res: number, signal?: AbortSignal) => { + async (h3: string, res: number, signal?: AbortSignal, fields?: string[]) => { const params = new URLSearchParams({ h3, resolution: res.toString(), @@ -805,6 +813,9 @@ export default function App() { .join(','); params.append('filters', filterStr); } + if (fields) { + params.set('fields', fields.join(',')); + } const response = await fetch(`${getApiBaseUrl()}/api/hexagon-stats?${params}`, { signal }); return (await response.json()) as HexagonStatsResponse; }, @@ -903,7 +914,9 @@ export default function App() { try { if (rightPaneTab === 'area') { setLoadingHoveredAreaStats(true); - const stats = await fetchHexagonStats(h3, resolution, signal); + // On hover, only fetch stats for features that have active filters + const hoverFields = Object.keys(filters); + const stats = await fetchHexagonStats(h3, resolution, signal, hoverFields.length > 0 ? hoverFields : undefined); if (!signal.aborted) setHoveredAreaStats(stats); } else if (rightPaneTab === 'properties') { const params = new URLSearchParams({ diff --git a/frontend/src/usePlausible.ts b/frontend/src/usePlausible.ts index c56107d..27a4d72 100644 --- a/frontend/src/usePlausible.ts +++ b/frontend/src/usePlausible.ts @@ -1,5 +1,6 @@ const DOMAIN = 'narrowit.schmelczer.dev'; const ENDPOINT = '/status'; +const IS_DEV = process.env.NODE_ENV !== 'production'; type EventOptions = { props?: Record; @@ -7,6 +8,8 @@ type EventOptions = { }; function sendEvent(name: string, options?: EventOptions) { + if (IS_DEV) return; + const payload: Record = { n: name, u: window.location.href, diff --git a/frontend/webpack.config.js b/frontend/webpack.config.js index f0c4082..15b9889 100644 --- a/frontend/webpack.config.js +++ b/frontend/webpack.config.js @@ -51,12 +51,6 @@ module.exports = (env, argv) => { context: ['/api'], target: 'http://localhost:8001', }, - { - context: ['/status'], - target: 'https://stats.schmelczer.dev', - changeOrigin: true, - pathRewrite: { '^/status': '/api/event' }, - }, ], }, }; diff --git a/server-rs/src/consts.rs b/server-rs/src/consts.rs index f376e1a..da867b9 100644 --- a/server-rs/src/consts.rs +++ b/server-rs/src/consts.rs @@ -1,6 +1,5 @@ pub const HISTOGRAM_BINS: usize = 100; -pub const H3_PRECOMPUTE_MIN: u8 = 7; pub const H3_PRECOMPUTE_MAX: u8 = 12; pub const H3_REQUEST_MIN: u8 = 4; pub const H3_REQUEST_MAX: u8 = 12; diff --git a/server-rs/src/filter.rs b/server-rs/src/filter.rs index 69d6e63..c868220 100644 --- a/server-rs/src/filter.rs +++ b/server-rs/src/filter.rs @@ -74,13 +74,14 @@ pub fn row_passes_filters( enum_filters: &[ParsedEnumFilter], feature_data: &[f32], num_features: usize, - enum_features: &[EnumFeatureData], + enum_data: &[u8], + num_enums: usize, ) -> bool { filters.iter().all(|filter| { let value = feature_data[row * num_features + filter.feat_idx]; value.is_finite() && value >= filter.min && value <= filter.max }) && enum_filters.iter().all(|enum_filter| { - let value = enum_features[enum_filter.enum_idx].data[row]; + let value = enum_data[row * num_enums + enum_filter.enum_idx]; value != ENUM_NULL && enum_filter.allowed.contains(&value) }) } diff --git a/server-rs/src/grid_index.rs b/server-rs/src/grid_index.rs index 6849cbe..e8d2967 100644 --- a/server-rs/src/grid_index.rs +++ b/server-rs/src/grid_index.rs @@ -1,15 +1,20 @@ /// Grid-based spatial index for fast rectangle queries over property rows. /// -/// Divides the UK bounding box into cells of ~0.01 degrees (~1km), -/// each storing indices of rows whose lat/lon falls within that cell. +/// Divides the bounding box into cells of ~0.01 degrees (~1km). +/// Uses a Compressed Sparse Row (CSR) layout: a single flat `values` array +/// plus an `offsets` array so that cell `i` owns `values[offsets[i]..offsets[i+1]]`. +/// This eliminates per-cell Vec overhead (24 bytes each for ptr+len+cap). pub struct GridIndex { min_lat: f32, min_lon: f32, cell_size: f32, cols: usize, rows: usize, - /// cells[row * cols + col] = vec of row indices - cells: Vec>, + /// Flat array of row indices, grouped by cell. + values: Vec, + /// offsets[i] is the start index in `values` for cell i. + /// offsets[num_cells] is values.len() (sentinel). + offsets: Vec, } impl GridIndex { @@ -41,25 +46,47 @@ impl GridIndex { let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1; let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1; + let num_cells = rows * cols; tracing::debug!( rows_grid = rows, cols_grid = cols, - total_cells = rows * cols, + total_cells = num_cells, cell_size, - "Building grid index" + "Building grid index (CSR)" ); - let mut cells: Vec> = vec![Vec::new(); rows * cols]; + // First pass: count items per cell + let mut counts = vec![0u32; num_cells]; + for index in 0..lat.len() { + let grid_row = ((lat[index] - min_lat) / cell_size) as usize; + let grid_col = ((lon[index] - min_lon) / cell_size) as usize; + counts[grid_row * cols + grid_col] += 1; + } + // Build offsets from counts (prefix sum) + let mut offsets = Vec::with_capacity(num_cells + 1); + let mut running = 0u32; + for &count in &counts { + offsets.push(running); + running += count; + } + offsets.push(running); + let total = running as usize; + + // Second pass: fill values using write cursors + let mut cursors = offsets[..num_cells].to_vec(); + let mut values = vec![0u32; total]; for index in 0..lat.len() { let grid_row = ((lat[index] - min_lat) / cell_size) as usize; let grid_col = ((lon[index] - min_lon) / cell_size) as usize; let cell_index = grid_row * cols + grid_col; - cells[cell_index].push(index as u32); + let pos = cursors[cell_index] as usize; + values[pos] = index as u32; + cursors[cell_index] += 1; } - tracing::debug!("Grid index built"); + tracing::debug!("Grid index built (CSR)"); GridIndex { min_lat, @@ -67,7 +94,8 @@ impl GridIndex { cell_size, cols, rows, - cells, + values, + offsets, } } @@ -83,7 +111,10 @@ impl GridIndex { for row in row_min..=row_max { let row_start = row * self.cols; for col in col_min..=col_max { - result.extend_from_slice(&self.cells[row_start + col]); + let cell_idx = row_start + col; + let start = self.offsets[cell_idx] as usize; + let end = self.offsets[cell_idx + 1] as usize; + result.extend_from_slice(&self.values[start..end]); } } @@ -108,7 +139,10 @@ impl GridIndex { for row in row_min..=row_max { let row_start = row * self.cols; for col in col_min..=col_max { - for &row_idx in &self.cells[row_start + col] { + let cell_idx = row_start + col; + let start = self.offsets[cell_idx] as usize; + let end = self.offsets[cell_idx + 1] as usize; + for &row_idx in &self.values[start..end] { callback(row_idx); } } diff --git a/server-rs/src/main.rs b/server-rs/src/main.rs index eb5d569..4d8b9cf 100644 --- a/server-rs/src/main.rs +++ b/server-rs/src/main.rs @@ -72,8 +72,7 @@ async fn main() -> anyhow::Result<()> { let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, consts::GRID_CELL_SIZE); info!( - "Precomputing H3 cells for resolutions {}-{}", - consts::H3_PRECOMPUTE_MIN, + "Precomputing H3 cells at resolution {}", consts::H3_PRECOMPUTE_MAX ); let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?; diff --git a/server-rs/src/routes/hexagon_stats.rs b/server-rs/src/routes/hexagon_stats.rs index 0f5182c..f05d652 100644 --- a/server-rs/src/routes/hexagon_stats.rs +++ b/server-rs/src/routes/hexagon_stats.rs @@ -8,7 +8,7 @@ use axum::response::IntoResponse; use serde::Deserialize; use tracing::{info, warn}; -use crate::consts::{ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS}; +use crate::consts::{ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS}; use crate::filter::{parse_filters, row_passes_filters}; use crate::state::AppState; @@ -19,6 +19,10 @@ pub struct HexagonStatsParams { pub h3: String, pub resolution: u8, pub filters: Option, + /// Comma-separated feature names to include in stats response. + /// When present (even if empty), only listed features are computed. + /// When absent, all features are returned (backward compatible). + pub fields: Option, } pub async fn get_hexagon_stats( @@ -45,8 +49,6 @@ pub async fn get_hexagon_stats( ), )); } - let resolution_idx = resolution as usize; - let h3_str = params.h3.clone(); let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( @@ -56,42 +58,58 @@ pub async fn get_hexagon_stats( ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); + // Parse optional `fields` param into sets of feature names. + // None = include all, Some = only include listed features. + let field_set: Option> = params.fields.as_ref().map(|fields_str| { + fields_str + .split(',') + .map(|field| field.trim().to_string()) + .filter(|field| !field.is_empty()) + .collect() + }); + let result = tokio::task::spawn_blocking(move || { let start_time = std::time::Instant::now(); - let precomputed: Option<&[u64]> = state - .h3_cells - .get(resolution_idx) - .filter(|cells| !cells.is_empty()) - .map(|cells| cells.as_slice()); + let precomputed = &state.h3_cells; let h3_res = h3o::Resolution::try_from(resolution) .map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?; + let need_parent = resolution < H3_PRECOMPUTE_MAX; let num_features = state.data.num_features; + let num_enums = state.data.num_enums; let feature_data = &state.data.feature_data; + let enum_data = &state.data.enum_data; let enum_features = &state.data.enum_features; let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001); + // Resolve cell at requested resolution from precomputed max-resolution cell + let cell_for_row = |row: usize| -> u64 { + let max_cell = precomputed[row]; + if !need_parent || max_cell == 0 { + return max_cell; + } + h3o::CellIndex::try_from(max_cell) + .ok() + .and_then(|ci| ci.parent(h3_res)) + .map(u64::from) + .unwrap_or(0) + }; + // Collect matching rows let mut matching_rows: Vec = Vec::new(); state .grid .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| { let row = row_idx as usize; - let row_cell = if let Some(h3_data) = precomputed { - h3_data[row] - } else { - h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64) - .map(|coord| u64::from(coord.to_cell(h3_res))) - .unwrap_or(0) - }; - if row_cell == cell_u64 + if cell_for_row(row) == cell_u64 && row_passes_filters( row, &parsed_filters, &parsed_enum_filters, feature_data, num_features, - enum_features, + enum_data, + num_enums, ) { matching_rows.push(row); @@ -109,6 +127,12 @@ pub async fn get_hexagon_stats( output.push_str(",\"numeric_features\":["); let mut first_numeric = true; for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() { + // Skip features not in the requested set (when fields param is present) + if let Some(ref set) = field_set { + if !set.contains(feature_name.as_str()) { + continue; + } + } let global_stats = &state.data.feature_stats[feature_index]; let histogram_min = global_stats.histogram.min; let histogram_max = global_stats.histogram.max; @@ -178,15 +202,20 @@ pub async fn get_hexagon_stats( output.push_str("],\"enum_features\":["); let mut first_enum = true; for enum_feature in enum_features { + // Skip enum features not in the requested set + if let Some(ref set) = field_set { + if !set.contains(enum_feature.name.as_str()) { + continue; + } + } let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) { Some(&index) => index, None => continue, }; - let enum_data = &state.data.enum_features[enum_index]; - let mut value_counts = vec![0u64; enum_data.values.len()]; + let mut value_counts = vec![0u64; enum_feature.values.len()]; for &row in &matching_rows { - let value = enum_data.data[row]; + let value = enum_data[row * num_enums + enum_index]; if value != ENUM_NULL && (value as usize) < value_counts.len() { value_counts[value as usize] += 1; } @@ -215,7 +244,7 @@ pub async fn get_hexagon_stats( output.push(','); } first_value = false; - write_json_string(&mut output, &enum_data.values[value_index]); + write_json_string(&mut output, &enum_feature.values[value_index]); write!(output, ":{}", count).unwrap(); } output.push_str("}}"); diff --git a/server-rs/src/routes/hexagons.rs b/server-rs/src/routes/hexagons.rs index a515825..ac3c9da 100644 --- a/server-rs/src/routes/hexagons.rs +++ b/server-rs/src/routes/hexagons.rs @@ -9,8 +9,8 @@ use serde::Deserialize; use tracing::{info, warn}; use crate::consts::{ - BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, - POSTCODE_MIN_RESOLUTION, + BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, + H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION, }; use crate::filter::parse_filters; use crate::state::AppState; @@ -39,16 +39,21 @@ pub struct HexagonParams { /// Comma-separated filters: `name:min:max,...` /// Rows must have non-NaN values within [min,max] for each filter. filters: Option, + /// Comma-separated feature names to include in min/max aggregation. + /// When present (even if empty), only listed features are aggregated and written. + /// When absent, all features are included (backward compatible). + fields: Option, } -/// Per-cell accumulator for aggregating features +/// Per-cell accumulator for aggregating features. +/// Uses Box<[T]> instead of Vec to avoid storing capacity (saves 8 bytes per field per cell). struct CellAgg { count: u32, - mins: Vec, - maxs: Vec, + mins: Box<[f32]>, + maxs: Box<[f32]>, /// Min/max ordinal indices for enum features (255 = no data yet) - enum_mins: Vec, - enum_maxs: Vec, + enum_mins: Box<[u8]>, + enum_maxs: Box<[u8]>, /// Most common postcode in this cell (only tracked at high resolutions) postcode: Option, postcode_count: u32, @@ -60,10 +65,10 @@ impl CellAgg { fn new(num_features: usize, num_enums: usize) -> Self { CellAgg { count: 0, - mins: vec![f32::INFINITY; num_features], - maxs: vec![f32::NEG_INFINITY; num_features], - enum_mins: vec![ENUM_NULL; num_enums], - enum_maxs: vec![0; num_enums], + mins: vec![f32::INFINITY; num_features].into_boxed_slice(), + maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(), + enum_mins: vec![ENUM_NULL; num_enums].into_boxed_slice(), + enum_maxs: vec![0; num_enums].into_boxed_slice(), postcode: None, postcode_count: 0, lat_sum: 0.0, @@ -93,9 +98,45 @@ impl CellAgg { /// Track min/max ordinal index for each enum feature in this cell. #[inline] - fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) { - for (enum_index, enum_feature) in enum_features.iter().enumerate() { - let value = enum_feature.data[row]; + fn add_enums(&mut self, enum_data: &[u8], row: usize, num_enums: usize) { + let base = row * num_enums; + let row_slice = &enum_data[base..base + num_enums]; + for (enum_index, &value) in row_slice.iter().enumerate() { + if value != ENUM_NULL { + if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] { + self.enum_mins[enum_index] = value; + } + if value > self.enum_maxs[enum_index] { + self.enum_maxs[enum_index] = value; + } + } + } + } + + /// Add a row, only aggregating the features at the given indices. + #[inline] + fn add_row_selective(&mut self, feature_data: &[f32], row: usize, num_features: usize, indices: &[usize]) { + self.count += 1; + let base = row * num_features; + for &feat_index in indices { + let value = feature_data[base + feat_index]; + if value.is_finite() { + if value < self.mins[feat_index] { + self.mins[feat_index] = value; + } + if value > self.maxs[feat_index] { + self.maxs[feat_index] = value; + } + } + } + } + + /// Track min/max ordinal index for selected enum features only. + #[inline] + fn add_enums_selective(&mut self, enum_data: &[u8], row: usize, num_enums: usize, indices: &[usize]) { + let base = row * num_enums; + for &enum_index in indices { + let value = enum_data[base + enum_index]; if value != ENUM_NULL { if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] { self.enum_mins[enum_index] = value; @@ -142,6 +183,7 @@ pub(crate) fn write_json_escaped(buf: &mut String, text: &str) { /// Write the hexagons JSON response directly to a String buffer, /// avoiding serde_json::Value allocations entirely. +/// When `numeric_indices` / `enum_indices` are Some, only those features are written. #[allow(clippy::too_many_arguments)] fn write_hexagons_json( buf: &mut String, @@ -153,6 +195,8 @@ fn write_hexagons_json( enum_max_keys: &[String], num_enums: usize, include_postcode: bool, + numeric_indices: Option<&[usize]>, + enum_indices: Option<&[usize]>, ) { buf.push_str("{\"features\":["); let mut first = true; @@ -168,24 +212,49 @@ fn write_hexagons_json( let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count); - for feat_index in 0..num_features { - if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() { - let _ = write!( - buf, - ",\"{}\":{},\"{}\":{}", - min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index] - ); + if let Some(indices) = numeric_indices { + for &feat_index in indices { + if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() { + let _ = write!( + buf, + ",\"{}\":{},\"{}\":{}", + min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index] + ); + } + } + } else { + for feat_index in 0..num_features { + if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() { + let _ = write!( + buf, + ",\"{}\":{},\"{}\":{}", + min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index] + ); + } } } - for enum_index in 0..num_enums { - if aggregation.enum_mins[enum_index] != ENUM_NULL { - let _ = write!( - buf, - ",\"{}\":{},\"{}\":{}", - enum_min_keys[enum_index], aggregation.enum_mins[enum_index], - enum_max_keys[enum_index], aggregation.enum_maxs[enum_index] - ); + if let Some(indices) = enum_indices { + for &enum_index in indices { + if aggregation.enum_mins[enum_index] != ENUM_NULL { + let _ = write!( + buf, + ",\"{}\":{},\"{}\":{}", + enum_min_keys[enum_index], aggregation.enum_mins[enum_index], + enum_max_keys[enum_index], aggregation.enum_maxs[enum_index] + ); + } + } + } else { + for enum_index in 0..num_enums { + if aggregation.enum_mins[enum_index] != ENUM_NULL { + let _ = write!( + buf, + ",\"{}\":{},\"{}\":{}", + enum_min_keys[enum_index], aggregation.enum_mins[enum_index], + enum_max_keys[enum_index], aggregation.enum_maxs[enum_index] + ); + } } } @@ -253,27 +322,48 @@ pub async fn get_hexagons( ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); + // Parse optional `fields` param into numeric and enum index sets. + // If `fields` is absent (None), all features are included. + // If `fields` is present (even empty string), only listed features are included. + let field_indices: Option<(Vec, Vec)> = params.fields.as_ref().map(|fields_str| { + let mut numeric_indices = Vec::new(); + let mut enum_indices = Vec::new(); + if !fields_str.is_empty() { + for name in fields_str.split(',') { + let name = name.trim(); + if name.is_empty() { + continue; + } + if let Some(idx) = state.data.feature_names.iter().position(|feat| feat == name) { + numeric_indices.push(idx); + } else if let Some(&idx) = state.enum_name_to_idx.get(name) { + enum_indices.push(idx); + } + } + } + (numeric_indices, enum_indices) + }); + let json_body = tokio::task::spawn_blocking(move || -> Result { let t0 = std::time::Instant::now(); let num_features = state.data.num_features; - let num_enums = state.data.enum_features.len(); + let num_enums = state.data.num_enums; let feature_data = &state.data.feature_data; + let enum_data = &state.data.enum_data; let min_keys = &state.min_keys; let max_keys = &state.max_keys; let enum_min_keys = &state.enum_min_keys; let enum_max_keys = &state.enum_max_keys; - let h3_cells_for_res: Option<&[u64]> = state - .h3_cells - .get(resolution as usize) - .filter(|cells| !cells.is_empty()) - .map(|cells| cells.as_slice()); + let h3_res = h3o::Resolution::try_from(resolution) + .map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?; + let precomputed = &state.h3_cells; + let need_parent = resolution < H3_PRECOMPUTE_MAX; let mut groups: FxHashMap = FxHashMap::default(); - let enum_features = &state.data.enum_features; let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION; // Row-level filter check: numeric must be non-NaN and within [min, max], @@ -283,60 +373,58 @@ pub async fn get_hexagons( let value = feature_data[row * num_features + filter.feat_idx]; value.is_finite() && value >= filter.min && value <= filter.max }) && parsed_enum_filters.iter().all(|enum_filter| { - let value = enum_features[enum_filter.enum_idx].data[row]; + let value = enum_data[row * num_enums + enum_filter.enum_idx]; value != ENUM_NULL && enum_filter.allowed.contains(&value) }) }; - if let Some(precomputed) = h3_cells_for_res { - state - .grid - .for_each_in_bounds(south, west, north, east, |row_idx| { - let row = row_idx as usize; - if !row_passes(row) { - return; - } - let cell_id = precomputed[row]; - let aggregation = groups - .entry(cell_id) - .or_insert_with(|| CellAgg::new(num_features, num_enums)); - aggregation.add_row(feature_data, row, num_features); - aggregation.add_enums(enum_features, row); - if include_postcode { - aggregation.add_postcode( - state.data.postcode(row), - state.data.lat[row], - state.data.lon[row], - ); - } - }); - } else { - let h3_res = h3o::Resolution::try_from(resolution) - .map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?; - state - .grid - .for_each_in_bounds(south, west, north, east, |row_idx| { - let row = row_idx as usize; - if !row_passes(row) { - return; - } - let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64) - .map(|coord| u64::from(coord.to_cell(h3_res))) - .unwrap_or(0); - let aggregation = groups - .entry(cell_id) - .or_insert_with(|| CellAgg::new(num_features, num_enums)); - aggregation.add_row(feature_data, row, num_features); - aggregation.add_enums(enum_features, row); - if include_postcode { - aggregation.add_postcode( - state.data.postcode(row), - state.data.lat[row], - state.data.lon[row], - ); - } - }); - } + // Choose aggregation strategy based on whether fields are specified + let has_selective = field_indices.is_some(); + let (sel_numeric, sel_enum) = field_indices.as_ref().map_or((&[][..], &[][..]), |(ni, ei)| (ni.as_slice(), ei.as_slice())); + + let aggregate_row = |groups: &mut FxHashMap, cell_id: u64, row: usize| { + let aggregation = groups + .entry(cell_id) + .or_insert_with(|| CellAgg::new(num_features, num_enums)); + if has_selective { + aggregation.add_row_selective(feature_data, row, num_features, sel_numeric); + aggregation.add_enums_selective(enum_data, row, num_enums, sel_enum); + } else { + aggregation.add_row(feature_data, row, num_features); + aggregation.add_enums(enum_data, row, num_enums); + } + if include_postcode { + aggregation.add_postcode( + state.data.postcode(row), + state.data.lat[row], + state.data.lon[row], + ); + } + }; + + // Resolve cell at requested resolution from precomputed max-resolution cell. + // For max resolution, use directly; for lower resolutions, derive parent. + let cell_for_row = |row: usize| -> u64 { + let max_cell = precomputed[row]; + if !need_parent || max_cell == 0 { + return max_cell; + } + h3o::CellIndex::try_from(max_cell) + .ok() + .and_then(|ci| ci.parent(h3_res)) + .map(u64::from) + .unwrap_or(0) + }; + + state + .grid + .for_each_in_bounds(south, west, north, east, |row_idx| { + let row = row_idx as usize; + if !row_passes(row) { + return; + } + aggregate_row(&mut groups, cell_for_row(row), row); + }); let t_agg = t0.elapsed(); @@ -351,6 +439,8 @@ pub async fn get_hexagons( enum_max_keys, num_enums, include_postcode, + field_indices.as_ref().map(|(ni, _)| ni.as_slice()), + field_indices.as_ref().map(|(_, ei)| ei.as_slice()), ); let t_total = t0.elapsed(); diff --git a/server-rs/src/routes/pois.rs b/server-rs/src/routes/pois.rs index f4569f3..8caee48 100644 --- a/server-rs/src/routes/pois.rs +++ b/server-rs/src/routes/pois.rs @@ -2,14 +2,14 @@ use std::sync::Arc; use axum::extract::Query; use axum::http::StatusCode; -use axum::response::Json; +use axum::response::{IntoResponse, Json}; use serde::{Deserialize, Serialize}; use tracing::info; use crate::consts::MAX_POIS_PER_REQUEST; -use crate::data::POI; use crate::state::{AppState, POICategoryGroup}; +use super::hexagons::write_json_escaped; use super::parse::parse_bounds; #[derive(Deserialize)] @@ -19,15 +19,10 @@ pub struct POIParams { categories: Option, } -#[derive(Serialize)] -pub struct POIsResponse { - pois: Vec, -} - pub async fn get_pois( state: Arc, Query(params): Query, -) -> Result, (StatusCode, String)> { +) -> Result { let bounds_str = params.bounds.ok_or(( StatusCode::BAD_REQUEST, "bounds parameter is required".into(), @@ -44,7 +39,7 @@ pub async fn get_pois( let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0); - let result = tokio::task::spawn_blocking(move || { + let json_body = tokio::task::spawn_blocking(move || { let t0 = std::time::Instant::now(); let row_indices = state.poi_grid.query(south, west, north, east); @@ -64,36 +59,46 @@ pub async fn get_pois( .collect(); if matching_rows.len() > MAX_POIS_PER_REQUEST { - // Use a power-of-2 sampling step so each POI's inclusion depends - // only on its own priority hash, not on what other POIs are in - // the viewport. This prevents visible reshuffling when panning. let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32; let step = ratio.next_power_of_two(); let mask = step - 1; matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0); - // Statistical noise may leave us slightly over the limit if matching_rows.len() > MAX_POIS_PER_REQUEST { matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]); matching_rows.truncate(MAX_POIS_PER_REQUEST); } } - let pois: Vec = matching_rows - .iter() - .map(|&row| POI { - id: state.poi_data.id[row].clone(), - name: state.poi_data.name[row].clone(), - category: state.poi_data.category.get(row).to_string(), - group: state.poi_data.group.get(row).to_string(), - lat: state.poi_data.lat[row], - lng: state.poi_data.lng[row], - emoji: state.poi_data.emoji.get(row).to_string(), - }) - .collect(); + // Write JSON directly to string buffer, avoiding intermediate POI allocations + let mut buf = String::with_capacity(matching_rows.len() * 128); + buf.push_str("{\"pois\":["); + + for (i, &row) in matching_rows.iter().enumerate() { + if i > 0 { + buf.push(','); + } + buf.push_str("{\"id\":\""); + write_json_escaped(&mut buf, &state.poi_data.id[row]); + buf.push_str("\",\"name\":\""); + write_json_escaped(&mut buf, &state.poi_data.name[row]); + buf.push_str("\",\"category\":\""); + write_json_escaped(&mut buf, state.poi_data.category.get(row)); + buf.push_str("\",\"group\":\""); + write_json_escaped(&mut buf, state.poi_data.group.get(row)); + buf.push_str("\",\"lat\":"); + buf.push_str(&state.poi_data.lat[row].to_string()); + buf.push_str(",\"lng\":"); + buf.push_str(&state.poi_data.lng[row].to_string()); + buf.push_str(",\"emoji\":\""); + write_json_escaped(&mut buf, state.poi_data.emoji.get(row)); + buf.push_str("\"}"); + } + + buf.push_str("]}"); let elapsed = t0.elapsed(); info!( - results = pois.len(), + results = matching_rows.len(), candidates = row_indices.len(), categories = num_categories, categories_raw = categories_str.as_deref().unwrap_or("-"), @@ -101,12 +106,12 @@ pub async fn get_pois( "GET /api/pois" ); - POIsResponse { pois } + buf }) .await .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?; - Ok(Json(result)) + Ok(([("content-type", "application/json")], json_body)) } #[derive(Serialize)] diff --git a/server-rs/src/routes/properties.rs b/server-rs/src/routes/properties.rs index 8131ffe..47592eb 100644 --- a/server-rs/src/routes/properties.rs +++ b/server-rs/src/routes/properties.rs @@ -8,7 +8,7 @@ use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tracing::{info, warn}; -use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT}; +use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT}; use crate::data::EnumFeatureData; use crate::filter::{parse_filters, row_passes_filters}; use crate::state::AppState; @@ -65,6 +65,8 @@ fn non_empty_string(text: &str) -> Option { fn lookup_enum_value( enum_features: &[EnumFeatureData], + enum_data: &[u8], + num_enums: usize, enum_idx: &FxHashMap, row: usize, names: &[&str], @@ -72,7 +74,7 @@ fn lookup_enum_value( for name in names { if let Some(&feature_index) = enum_idx.get(*name) { let enum_feature = &enum_features[feature_index]; - let data_index = enum_feature.data[row]; + let data_index = enum_data[row * num_enums + feature_index]; if data_index != ENUM_NULL { if let Some(value) = enum_feature.values.get(data_index as usize) { return Some(value.clone()); @@ -107,8 +109,6 @@ pub async fn get_hexagon_properties( ), )); } - let resolution_idx = resolution as usize; - let h3_str = params.h3.clone(); let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( @@ -120,39 +120,44 @@ pub async fn get_hexagon_properties( let result = tokio::task::spawn_blocking(move || { let t0 = std::time::Instant::now(); - let precomputed: Option<&[u64]> = state - .h3_cells - .get(resolution_idx) - .filter(|cells| !cells.is_empty()) - .map(|cells| cells.as_slice()); + let precomputed = &state.h3_cells; let h3_res = h3o::Resolution::try_from(resolution) .map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?; + let need_parent = resolution < H3_PRECOMPUTE_MAX; let num_features = state.data.num_features; + let num_enums = state.data.num_enums; let feature_data = &state.data.feature_data; + let enum_data_flat = &state.data.enum_data; let enum_features = &state.data.enum_features; let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001); + let cell_for_row = |row: usize| -> u64 { + let max_cell = precomputed[row]; + if !need_parent || max_cell == 0 { + return max_cell; + } + h3o::CellIndex::try_from(max_cell) + .ok() + .and_then(|ci| ci.parent(h3_res)) + .map(u64::from) + .unwrap_or(0) + }; + let mut matching_rows: Vec = Vec::new(); state .grid .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| { let row = row_idx as usize; - let row_cell = if let Some(h3_data) = precomputed { - h3_data[row] - } else { - h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64) - .map(|coord| u64::from(coord.to_cell(h3_res))) - .unwrap_or(0) - }; - if row_cell == cell_u64 + if cell_for_row(row) == cell_u64 && row_passes_filters( row, &parsed_filters, &parsed_enum_filters, feature_data, num_features, - enum_features, + enum_data_flat, + num_enums, ) { matching_rows.push(row); @@ -181,33 +186,43 @@ pub async fn get_hexagon_properties( Property { address: non_empty_string(state.data.address(row)), postcode: non_empty_string(state.data.postcode(row)), - is_construction_date_approximate: Some(state.data.is_approx_build_date[row]), + is_construction_date_approximate: Some(state.data.is_approx_build_date(row)), property_type: lookup_enum_value( enum_features, + enum_data_flat, + num_enums, &state.enum_name_to_idx, row, &["Property type", "epc_property_type", "pp_property_type"], ), built_form: lookup_enum_value( enum_features, + enum_data_flat, + num_enums, &state.enum_name_to_idx, row, &["Property type/built form", "built_form"], ), duration: lookup_enum_value( enum_features, + enum_data_flat, + num_enums, &state.enum_name_to_idx, row, &["Leashold/Freehold", "duration"], ), current_energy_rating: lookup_enum_value( enum_features, + enum_data_flat, + num_enums, &state.enum_name_to_idx, row, &["Current energy rating", "current_energy_rating"], ), potential_energy_rating: lookup_enum_value( enum_features, + enum_data_flat, + num_enums, &state.enum_name_to_idx, row, &["Potential energy rating", "potential_energy_rating"], diff --git a/server-rs/src/state.rs b/server-rs/src/state.rs index 0c146fd..7152a66 100644 --- a/server-rs/src/state.rs +++ b/server-rs/src/state.rs @@ -13,9 +13,9 @@ pub struct POICategoryGroup { pub struct AppState { pub data: PropertyData, pub grid: GridIndex, - /// h3_cells[resolution][row_idx] = precomputed H3 cell ID. - /// Empty Vec for resolutions not precomputed. - pub h3_cells: Vec>, + /// h3_cells[row_idx] = precomputed H3 cell ID at max resolution (12). + /// Parent cells for lower resolutions derived via CellIndex::parent(). + pub h3_cells: Vec, pub poi_data: POIData, pub poi_grid: GridIndex, /// Precomputed JSON key names: "min_{feature_name}" for each numeric feature diff --git a/server-rs/src/tests.rs b/server-rs/src/tests.rs index 4728843..467d265 100644 --- a/server-rs/src/tests.rs +++ b/server-rs/src/tests.rs @@ -102,12 +102,13 @@ mod filter_tests { let feature_names = vec!["price".to_string()]; let feature_data = vec![f32::NAN]; let enum_features: Vec = vec![]; + let enum_data: Vec = vec![]; let (numeric, enums) = parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features); assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter"); - let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_features); + let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_data, 0); assert!(!passes, "NaN should fail filter even with infinite range"); } @@ -116,15 +117,16 @@ mod filter_tests { let enum_features = vec![EnumFeatureData { name: "rating".to_string(), values: vec!["A".to_string(), "B".to_string()], - data: vec![0], }]; let feature_names: Vec = vec![]; + // Row-major enum data: 1 row, 1 enum, value=0 (index into "A") + let enum_data: Vec = vec![0]; let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features); assert_eq!(enums.len(), 1); assert!(enums[0].allowed.is_empty()); - let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_features); + let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_data, 1); assert!(!passes, "Empty allowed set should reject all rows"); } @@ -133,7 +135,6 @@ mod filter_tests { let enum_features = vec![EnumFeatureData { name: "rating".to_string(), values: vec!["A".to_string(), "B".to_string()], - data: vec![0], }]; let feature_names: Vec = vec![];