Various fixes

This commit is contained in:
Andras Schmelczer 2026-02-04 22:29:42 +00:00
parent 34a4d0ba86
commit 55598aaaa0
14 changed files with 1250 additions and 130 deletions

View file

@ -8,7 +8,7 @@ use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
@ -87,7 +87,7 @@ pub async fn get_hexagon_stats(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -188,15 +188,17 @@ pub async fn get_hexagon_stats(
let global_hist = &state.data.feature_stats[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
// Use same bin count as global histogram for consistency
let num_bins = global_hist.counts.len();
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; HISTOGRAM_BINS];
let mut bins = vec![0u64; num_bins];
// Compute middle bin width (between p1 and p99)
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
@ -219,13 +221,13 @@ pub async fn get_hexagon_stats(
let bin = if value < p1 {
0 // Low outlier bin
} else if value >= p99 {
HISTOGRAM_BINS - 1 // High outlier bin
num_bins - 1 // High outlier bin
} else if middle_width > 0.0 {
// Middle bins (1 to n-2)
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
(1 + middle_bin).min(num_bins - 2)
} else {
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
num_bins / 2 // Fallback if p1 == p99
};
bins[bin] += 1;
}

View file

@ -9,7 +9,7 @@ use serde_json::{Map, Value};
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::parsing::{bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
#[derive(Serialize)]
@ -92,21 +92,29 @@ impl CellAgg {
}
}
/// Build feature maps from aggregated cell data.
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
fn build_feature_maps(
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
indices: Option<&[usize]>,
query_bounds: (f64, f64, f64, f64), // (south, west, north, east)
) -> Vec<Map<String, Value>> {
let mut features = Vec::with_capacity(groups.len());
let (q_south, q_west, q_north, q_east) = query_bounds;
for (&cell_id, aggregation) in groups {
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
continue;
};
// Filter out cells that don't intersect the query bounds
let (c_south, c_west, c_north, c_east) = h3_cell_bounds(cell, 0.0);
if !bounds_intersect(c_south, c_west, c_north, c_east, q_south, q_west, q_north, q_east) {
continue;
}
let mut map = Map::new();
map.insert("h3".into(), Value::String(cell.to_string()));
map.insert("count".into(), Value::Number(aggregation.count.into()));
@ -166,7 +174,7 @@ pub async fn get_hexagons(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -185,11 +193,7 @@ pub async fn get_hexagons(
if name.is_empty() {
return None;
}
state
.data
.feature_names
.iter()
.position(|feat| feat == name)
state.feature_name_to_index.get(name).copied()
})
.collect()
});
@ -209,20 +213,6 @@ pub async fn get_hexagons(
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let has_selective = field_indices.is_some();
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
if has_selective {
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
aggregation.add_row(feature_data, row, num_features);
}
};
let cell_for_row = |row: usize| -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
@ -235,21 +225,48 @@ pub async fn get_hexagons(
.unwrap_or(0)
};
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
aggregate_row(&mut groups, cell_for_row(row), row);
});
// Hoist has_selective branch outside the hot loop to avoid per-row branching
if let Some(sel_indices) = field_indices.as_deref() {
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
let cell_id = cell_for_row(row);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
});
} else {
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
let cell_id = cell_for_row(row);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
aggregation.add_row(feature_data, row, num_features);
});
}
let t_agg = t0.elapsed();
@ -259,6 +276,7 @@ pub async fn get_hexagons(
max_keys,
num_features,
field_indices.as_deref(),
(south, west, north, east),
);
let t_total = t0.elapsed();

View file

@ -1,6 +1,6 @@
use std::sync::Arc;
use axum::extract::Query;
use axum::extract::{Path, Query};
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashMap;
@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::info;
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::parsing::{bounds_intersect, parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
#[derive(Serialize)]
@ -96,7 +96,7 @@ pub async fn get_postcodes(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -113,11 +113,7 @@ pub async fn get_postcodes(
if name.is_empty() {
return None;
}
state
.data
.feature_names
.iter()
.position(|feat| feat == name)
state.feature_name_to_index.get(name).copied()
})
.collect()
});
@ -134,12 +130,6 @@ pub async fn get_postcodes(
let has_selective = field_indices.is_some();
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
// Step 1: Find postcodes within bounds using spatial grid on centroids
let postcode_indices: Vec<u32> = postcode_data.grid.query(south, west, north, east);
// Step 2: For each postcode, aggregate properties
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
// Build postcode -> rows mapping by iterating properties in bounds
// and grouping by their postcode
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
@ -165,24 +155,23 @@ pub async fn get_postcodes(
}
});
// Now aggregate for each postcode that's in bounds and has properties
for &pc_idx in &postcode_indices {
let idx = pc_idx as usize;
if let Some(rows) = postcode_rows.get(&idx) {
let agg = postcode_aggs
.entry(idx)
.or_insert_with(|| PostcodeAgg::new(num_features));
for &row in rows {
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
agg.add_row(feature_data, row, num_features);
}
// Aggregate for each postcode that has properties in bounds
// (polygon intersection check happens later when building response)
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
for (&pc_idx, rows) in &postcode_rows {
let agg = postcode_aggs
.entry(pc_idx)
.or_insert_with(|| PostcodeAgg::new(num_features));
for &row in rows {
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
agg.add_row(feature_data, row, num_features);
}
}
}
// Build response
// Build response, filtering postcodes to only those whose polygon intersects query bounds
let mut features = Vec::with_capacity(postcode_aggs.len());
for (pc_idx, aggregation) in postcode_aggs {
@ -190,6 +179,23 @@ pub async fn get_postcodes(
continue;
}
// Compute postcode polygon bounding box and check intersection with query bounds
let vertices = &postcode_data.vertices[pc_idx];
let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY);
for &[lon, lat] in vertices {
let lon_f = lon as f64;
let lat_f = lat as f64;
if lat_f < pc_south { pc_south = lat_f; }
if lat_f > pc_north { pc_north = lat_f; }
if lon_f < pc_west { pc_west = lon_f; }
if lon_f > pc_east { pc_east = lon_f; }
}
if !bounds_intersect(pc_south, pc_west, pc_north, pc_east, south, west, north, east) {
continue;
}
let mut map = Map::new();
map.insert(
"postcode".into(),
@ -198,7 +204,7 @@ pub async fn get_postcodes(
map.insert("count".into(), Value::Number(aggregation.count.into()));
// Add vertices as array of [lon, lat] pairs
let vertices_array: Vec<Value> = postcode_data.vertices[pc_idx]
let vertices_array: Vec<Value> = vertices
.iter()
.map(|[lon, lat]| Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)]))
.collect();
@ -244,3 +250,44 @@ pub async fn get_postcodes(
Ok(Json(response))
}
#[derive(Serialize)]
pub struct PostcodeLookupResponse {
pub postcode: String,
pub latitude: f64,
pub longitude: f64,
/// Polygon vertices as [[lon, lat], ...] for rendering highlight
pub vertices: Vec<[f64; 2]>,
}
/// Look up a single postcode and return its centroid coordinates and polygon.
pub async fn get_postcode_lookup(
state: Arc<AppState>,
Path(postcode): Path<String>,
) -> Result<Json<PostcodeLookupResponse>, StatusCode> {
// Normalize the postcode: uppercase, remove extra spaces, ensure single space
let normalized = postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
let postcode_data = &state.postcode_data;
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
let (lat, lon) = postcode_data.centroids[idx];
let vertices: Vec<[f64; 2]> = postcode_data.vertices[idx]
.iter()
.map(|[lo, la]| [*lo as f64, *la as f64])
.collect();
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
Ok(Json(PostcodeLookupResponse {
postcode: normalized,
latitude: lat as f64,
longitude: lon as f64,
vertices,
}))
} else {
Err(StatusCode::NOT_FOUND)
}
}

View file

@ -66,7 +66,7 @@ fn non_empty_string(text: &str) -> Option<String> {
/// Look up an enum feature value by trying multiple possible column names.
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
fn lookup_enum_value(
feature_names: &[String],
feature_name_to_index: &FxHashMap<String, usize>,
feature_data: &[f32],
num_features: usize,
enum_values: &FxHashMap<usize, Vec<String>>,
@ -74,7 +74,7 @@ fn lookup_enum_value(
names: &[&str],
) -> Option<String> {
for name in names {
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) {
if let Some(&feat_idx) = feature_name_to_index.get(*name) {
if let Some(values) = enum_values.get(&feat_idx) {
let value = feature_data[row * num_features + feat_idx];
if value.is_finite() {
@ -120,7 +120,7 @@ pub async fn get_hexagon_properties(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -134,6 +134,7 @@ pub async fn get_hexagon_properties(
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let feature_names = &state.data.feature_names;
let feature_name_to_index = &state.feature_name_to_index;
let enum_values = &state.data.enum_values;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
@ -199,7 +200,7 @@ pub async fn get_hexagon_properties(
postcode: non_empty_string(state.data.postcode(row)),
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
property_type: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -207,7 +208,7 @@ pub async fn get_hexagon_properties(
&["Property type", "epc_property_type", "pp_property_type"],
),
built_form: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -215,7 +216,7 @@ pub async fn get_hexagon_properties(
&["Property type/built form", "built_form"],
),
duration: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -223,7 +224,7 @@ pub async fn get_hexagon_properties(
&["Leashold/Freehold", "duration"],
),
current_energy_rating: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -231,7 +232,7 @@ pub async fn get_hexagon_properties(
&["Current energy rating", "current_energy_rating"],
),
potential_energy_rating: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,

View file

@ -106,18 +106,7 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
"paint": { "background-color": bg_color }
})];
// Water layer
if layer_ids.contains(&"water") {
style_layers.push(serde_json::json!({
"id": "water",
"type": "fill",
"source": "protomaps",
"source-layer": "water",
"paint": { "fill-color": water_color }
}));
}
// Land/earth layer
// Land/earth layer (must come before water so rivers render on top)
if layer_ids.contains(&"earth") {
style_layers.push(serde_json::json!({
"id": "earth",
@ -128,9 +117,9 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
}));
}
// Landuse
// Landuse (parks, forests) - render before water
if layer_ids.contains(&"landuse") {
let landuse_color = if is_dark { "#1f2d1f" } else { "#d8e8c8" };
let park_color = if is_dark { "#2d4a2d" } else { "#c8e6c8" };
style_layers.push(serde_json::json!({
"id": "landuse-park",
"type": "fill",
@ -141,7 +130,18 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
["==", ["get", "pmap:kind"], "nature_reserve"],
["==", ["get", "pmap:kind"], "forest"]
],
"paint": { "fill-color": landuse_color, "fill-opacity": 0.5 }
"paint": { "fill-color": park_color, "fill-opacity": 0.7 }
}));
}
// Water layer (after earth so rivers show on top of land)
if layer_ids.contains(&"water") {
style_layers.push(serde_json::json!({
"id": "water",
"type": "fill",
"source": "protomaps",
"source-layer": "water",
"paint": { "fill-color": water_color }
}));
}