Refactor and improve

This commit is contained in:
Andras Schmelczer 2026-02-03 20:26:57 +00:00
parent 1f148b2185
commit 242acff987
22 changed files with 754 additions and 1053 deletions

View file

@ -4,11 +4,11 @@ use axum::response::Json;
use serde::Serialize;
use tracing::info;
use crate::data::Histogram;
use crate::data::{Histogram, PropertyData};
use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
use crate::state::AppState;
#[derive(Serialize)]
#[derive(Clone, Serialize)]
#[serde(tag = "type")]
pub enum FeatureInfo {
#[serde(rename = "numeric")]
@ -32,18 +32,19 @@ pub enum FeatureInfo {
},
}
#[derive(Serialize)]
#[derive(Clone, Serialize)]
pub struct FeatureGroupResponse {
name: String,
features: Vec<FeatureInfo>,
}
#[derive(Serialize)]
#[derive(Clone, Serialize)]
pub struct FeaturesResponse {
groups: Vec<FeatureGroupResponse>,
pub groups: Vec<FeatureGroupResponse>,
}
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
/// Build the features response at startup. Called once and cached in AppState.
pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
// Collect all group names in order, merging numeric and enum groups with the same name
let mut group_names: Vec<&str> = Vec::new();
for feature_group in FEATURE_GROUPS {
@ -66,13 +67,12 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
for feature_group in FEATURE_GROUPS {
if feature_group.name == group_name {
for feature_config in feature_group.features {
if let Some(feat_idx) = state
.data
if let Some(feat_idx) = data
.feature_names
.iter()
.position(|feat_name| feat_name == feature_config.name)
{
let stats = &state.data.feature_stats[feat_idx];
let stats = &data.feature_stats[feat_idx];
features.push(FeatureInfo::Numeric {
name: feature_config.name.to_string(),
min: stats.slider_min,
@ -92,19 +92,22 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
for enum_group in ENUM_FEATURE_GROUPS {
if enum_group.name == group_name {
for enum_config in enum_group.features {
if let Some(enum_feature) = state
.data
.enum_features
// Find the feature index by name
if let Some(feat_idx) = data
.feature_names
.iter()
.find(|enum_feat| enum_feat.name == enum_config.name)
.position(|name| name == enum_config.name)
{
features.push(FeatureInfo::Enum {
name: enum_config.name.to_string(),
values: enum_feature.values.clone(),
description: enum_config.description,
detail: enum_config.detail,
source: enum_config.source,
});
// Check if this feature has enum values
if let Some(values) = data.enum_values.get(&feat_idx) {
features.push(FeatureInfo::Enum {
name: enum_config.name.to_string(),
values: values.clone(),
description: enum_config.description,
detail: enum_config.detail,
source: enum_config.source,
});
}
}
}
}
@ -118,22 +121,10 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
}
}
let num_numeric: usize = groups
.iter()
.flat_map(|group| &group.features)
.filter(|feature| matches!(feature, FeatureInfo::Numeric { .. }))
.count();
let num_enum: usize = groups
.iter()
.flat_map(|group| &group.features)
.filter(|feature| matches!(feature, FeatureInfo::Enum { .. }))
.count();
info!(
numeric = num_numeric,
enums = num_enum,
groups = groups.len(),
"GET /api/features"
);
Json(FeaturesResponse { groups })
FeaturesResponse { groups }
}
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
info!("GET /api/features");
Json(state.features_response.clone())
}

View file

@ -1,18 +1,50 @@
use std::fmt::Write;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use serde::Deserialize;
use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
use crate::filter::{parse_filters, row_passes_filters};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
use super::parse::h3_cell_bounds;
#[derive(Serialize)]
pub struct HistogramStats {
min: f64,
max: f64,
/// 1st percentile (left edge of main distribution)
p1: f64,
/// 99th percentile (right edge of main distribution)
p99: f64,
counts: Vec<u64>,
}
#[derive(Serialize)]
pub struct NumericFeatureStats {
name: String,
count: usize,
min: f64,
max: f64,
mean: f64,
histogram: HistogramStats,
}
#[derive(Serialize)]
pub struct EnumFeatureStats {
name: String,
counts: HashMap<String, u64>,
}
#[derive(Serialize)]
pub struct HexagonStatsResponse {
count: usize,
numeric_features: Vec<NumericFeatureStats>,
enum_features: Vec<EnumFeatureStats>,
}
#[derive(Deserialize)]
pub struct HexagonStatsParams {
@ -20,15 +52,14 @@ pub struct HexagonStatsParams {
pub resolution: u8,
pub filters: Option<String>,
/// Comma-separated feature names to include in stats response.
/// When present (even if empty), only listed features are computed.
/// When absent, all features are returned (backward compatible).
/// Only listed features are computed; if absent or empty, no features are returned.
pub fields: Option<String>,
}
pub async fn get_hexagon_stats(
state: Arc<AppState>,
Query(params): Query<HexagonStatsParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
) -> Result<Json<HexagonStatsResponse>, (StatusCode, String)> {
let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
(
@ -57,36 +88,34 @@ pub async fn get_hexagon_stats(
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
// Parse optional `fields` param into sets of feature names.
// None = include all, Some = only include listed features.
let field_set: Option<std::collections::HashSet<String>> =
params.fields.as_ref().map(|fields_str| {
let fields_specified = params.fields.is_some();
let field_set: std::collections::HashSet<String> = params
.fields
.as_ref()
.map(|fields_str| {
fields_str
.split(',')
.map(|field| field.trim().to_string())
.filter(|field| !field.is_empty())
.collect()
});
})
.unwrap_or_default();
let result = tokio::task::spawn_blocking(move || {
let response = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let precomputed = &state.h3_cells;
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
let need_parent = resolution < H3_PRECOMPUTE_MAX;
let num_features = state.data.num_features;
let num_enums = state.data.num_enums;
let feature_data = &state.data.feature_data;
let enum_data = &state.data.enum_data;
let enum_features = &state.data.enum_features;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
// Resolve cell at requested resolution from precomputed max-resolution cell
let cell_for_row = |row: usize| -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
@ -99,7 +128,6 @@ pub async fn get_hexagon_stats(
.unwrap_or(0)
};
// Collect matching rows
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
@ -112,8 +140,6 @@ pub async fn get_hexagon_stats(
&parsed_enum_filters,
feature_data,
num_features,
enum_data,
num_enums,
)
{
matching_rows.push(row);
@ -122,140 +148,108 @@ pub async fn get_hexagon_stats(
let total_count = matching_rows.len();
// Build JSON directly via string buffer
let mut output = String::with_capacity(4096);
output.push_str("{\"count\":");
write!(output, "{}", total_count).unwrap();
let mut numeric_features = Vec::new();
let mut enum_features_out = Vec::new();
// Numeric features: compute count, min, max, sum, histogram using global bin edges
output.push_str(",\"numeric_features\":[");
let mut first_numeric = true;
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
// Skip features not in the requested set (when fields param is present)
if let Some(ref set) = field_set {
if !set.contains(feature_name.as_str()) {
continue;
}
}
let global_stats = &state.data.feature_stats[feature_index];
let histogram_min = global_stats.histogram.min;
let histogram_max = global_stats.histogram.max;
let bin_width = global_stats.histogram.bin_width;
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64; // keep f64 for mean precision
let mut bins = vec![0u64; HISTOGRAM_BINS];
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value as f64;
// Bin into histogram using global edges (cast to f64 for bin index math)
if bin_width > 0.0 {
let bin_index = ((value as f64 - histogram_min as f64) / bin_width as f64)
.floor() as isize;
let clamped_index =
bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
bins[clamped_index] += 1;
}
}
}
if count == 0 {
if fields_specified && !field_set.contains(feature_name.as_str()) {
continue;
}
if !first_numeric {
output.push(',');
}
first_numeric = false;
let mean = sum / count as f64;
output.push_str("{\"name\":");
write_json_string(&mut output, feature_name);
write!(output, ",\"count\":{}", count).unwrap();
write!(output, ",\"min\":{}", format_num(min_value)).unwrap();
write!(output, ",\"max\":{}", format_num(max_value)).unwrap();
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
output.push_str(",\"histogram\":{\"min\":");
write!(output, "{}", format_num(histogram_min)).unwrap();
output.push_str(",\"max\":");
write!(output, "{}", format_num(histogram_max)).unwrap();
output.push_str(",\"bin_width\":");
write!(output, "{}", format_num(bin_width)).unwrap();
output.push_str(",\"counts\":[");
for (bin_index, &bin_count) in bins.iter().enumerate() {
if bin_index > 0 {
output.push(',');
// Check if this is an enum feature
if let Some(enum_values) = state.data.enum_values.get(&feature_index) {
// Enum feature: count occurrences of each value
let mut value_counts = vec![0u64; enum_values.len()];
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
let idx = value as usize;
if idx < value_counts.len() {
value_counts[idx] += 1;
}
}
}
let counts: HashMap<String, u64> = value_counts
.iter()
.enumerate()
.filter(|(_, &count)| count > 0)
.map(|(idx, &count)| (enum_values[idx].clone(), count))
.collect();
if !counts.is_empty() {
enum_features_out.push(EnumFeatureStats {
name: feature_name.clone(),
counts,
});
}
} else {
// Numeric feature: compute stats and histogram
let global_hist = &state.data.feature_stats[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; HISTOGRAM_BINS];
// Compute middle bin width (between p1 and p99)
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
0.0
};
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value as f64;
// Bin using p1/p99 outlier structure
let bin = if value < p1 {
0 // Low outlier bin
} else if value >= p99 {
HISTOGRAM_BINS - 1 // High outlier bin
} else if middle_width > 0.0 {
// Middle bins (1 to n-2)
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
} else {
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
};
bins[bin] += 1;
}
}
if count > 0 {
numeric_features.push(NumericFeatureStats {
name: feature_name.clone(),
count,
min: min_value as f64,
max: max_value as f64,
mean: sum / count as f64,
histogram: HistogramStats {
min: global_hist.min as f64,
max: global_hist.max as f64,
p1: p1 as f64,
p99: p99 as f64,
counts: bins,
},
});
}
write!(output, "{}", bin_count).unwrap();
}
output.push_str("]}}")
}
// Enum features: count per value
output.push_str("],\"enum_features\":[");
let mut first_enum = true;
for enum_feature in enum_features {
// Skip enum features not in the requested set
if let Some(ref set) = field_set {
if !set.contains(enum_feature.name.as_str()) {
continue;
}
}
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
Some(&index) => index,
None => continue,
};
let mut value_counts = vec![0u64; enum_feature.values.len()];
for &row in &matching_rows {
let value = enum_data[row * num_enums + enum_index];
if value != ENUM_NULL && (value as usize) < value_counts.len() {
value_counts[value as usize] += 1;
}
}
// Only include if there are any non-zero counts
let has_values = value_counts.iter().any(|&count| count > 0);
if !has_values {
continue;
}
if !first_enum {
output.push(',');
}
first_enum = false;
output.push_str("{\"name\":");
write_json_string(&mut output, &enum_feature.name);
output.push_str(",\"counts\":{");
let mut first_value = true;
for (value_index, &count) in value_counts.iter().enumerate() {
if count == 0 {
continue;
}
if !first_value {
output.push(',');
}
first_value = false;
write_json_string(&mut output, &enum_feature.values[value_index]);
write!(output, ":{}", count).unwrap();
}
output.push_str("}}");
}
output.push_str("]}");
let elapsed = start_time.elapsed();
info!(
h3 = %h3_str,
@ -267,46 +261,15 @@ pub async fn get_hexagon_stats(
"GET /api/hexagon-stats"
);
Ok(output)
Ok(HexagonStatsResponse {
count: total_count,
numeric_features,
enum_features: enum_features_out,
})
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok((
[(axum::http::header::CONTENT_TYPE, "application/json")],
result,
))
}
fn write_json_string(output: &mut String, value: &str) {
output.push('"');
for character in value.chars() {
match character {
'"' => output.push_str("\\\""),
'\\' => output.push_str("\\\\"),
'\n' => output.push_str("\\n"),
'\r' => output.push_str("\\r"),
'\t' => output.push_str("\\t"),
other => output.push(other),
}
}
output.push('"');
}
fn format_num(value: f32) -> String {
let fv = value as f64;
if fv.fract() == 0.0 && fv.abs() < 1e15 {
format!("{:.1}", fv)
} else {
format!("{}", fv)
}
}
fn format_f64(value: f64) -> String {
if value.fract() == 0.0 && value.abs() < 1e15 {
format!("{:.1}", value)
} else {
format!("{}", value)
}
Ok(Json(response))
}

View file

@ -1,35 +1,22 @@
use std::fmt::{self, Write};
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use axum::response::Json;
use rustc_hash::FxHashMap;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::{info, warn};
use crate::consts::{
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX,
H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION,
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
};
use crate::filter::parse_filters;
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
use super::parse::parse_bounds;
struct HumanBytes(usize);
impl fmt::Display for HumanBytes {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let bytes = self.0;
if bytes >= 1_000_000 {
write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
} else if bytes >= 1_000 {
write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
} else {
write!(formatter, "{} B", bytes)
}
}
#[derive(Serialize)]
pub struct HexagonsResponse {
features: Vec<Map<String, Value>>,
}
#[derive(Deserialize)]
@ -51,28 +38,14 @@ struct CellAgg {
count: u32,
mins: Box<[f32]>,
maxs: Box<[f32]>,
/// Min/max ordinal indices for enum features (255 = no data yet)
enum_mins: Box<[u8]>,
enum_maxs: Box<[u8]>,
/// Most common postcode in this cell (only tracked at high resolutions)
postcode: Option<String>,
postcode_count: u32,
lat_sum: f64,
lon_sum: f64,
}
impl CellAgg {
fn new(num_features: usize, num_enums: usize) -> Self {
fn new(num_features: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
enum_mins: vec![ENUM_NULL; num_enums].into_boxed_slice(),
enum_maxs: vec![0; num_enums].into_boxed_slice(),
postcode: None,
postcode_count: 0,
lat_sum: 0.0,
lon_sum: 0.0,
}
}
@ -96,23 +69,6 @@ impl CellAgg {
}
}
/// Track min/max ordinal index for each enum feature in this cell.
#[inline]
fn add_enums(&mut self, enum_data: &[u8], row: usize, num_enums: usize) {
let base = row * num_enums;
let row_slice = &enum_data[base..base + num_enums];
for (enum_index, &value) in row_slice.iter().enumerate() {
if value != ENUM_NULL {
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
self.enum_mins[enum_index] = value;
}
if value > self.enum_maxs[enum_index] {
self.enum_maxs[enum_index] = value;
}
}
}
}
/// Add a row, only aggregating the features at the given indices.
#[inline]
fn add_row_selective(
@ -136,178 +92,57 @@ impl CellAgg {
}
}
}
/// Track min/max ordinal index for selected enum features only.
#[inline]
fn add_enums_selective(
&mut self,
enum_data: &[u8],
row: usize,
num_enums: usize,
indices: &[usize],
) {
let base = row * num_enums;
for &enum_index in indices {
let value = enum_data[base + enum_index];
if value != ENUM_NULL {
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
self.enum_mins[enum_index] = value;
}
if value > self.enum_maxs[enum_index] {
self.enum_maxs[enum_index] = value;
}
}
}
}
/// Track postcode and centroid for high-resolution cells.
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
#[inline]
fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) {
self.lat_sum += lat as f64;
self.lon_sum += lon as f64;
if postcode.is_empty() {
return;
}
if self.postcode.is_none() {
self.postcode = Some(postcode.to_string());
self.postcode_count = 1;
} else if self.postcode.as_deref() == Some(postcode) {
self.postcode_count += 1;
}
}
}
/// Escape a string for inclusion in a JSON string literal.
pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
for character in text.chars() {
match character {
'"' => buf.push_str("\\\""),
'\\' => buf.push_str("\\\\"),
'\n' => buf.push_str("\\n"),
'\r' => buf.push_str("\\r"),
'\t' => buf.push_str("\\t"),
ctrl if ctrl < '\x20' => {
let _ = write!(buf, "\\u{:04x}", ctrl as u32);
}
other => buf.push(other),
}
}
}
/// Write the hexagons JSON response directly to a String buffer,
/// avoiding serde_json::Value allocations entirely.
/// When `numeric_indices` / `enum_indices` are Some, only those features are written.
#[allow(clippy::too_many_arguments)]
fn write_hexagons_json(
buf: &mut String,
/// Build feature maps from aggregated cell data.
fn build_feature_maps(
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
enum_min_keys: &[String],
enum_max_keys: &[String],
num_enums: usize,
include_postcode: bool,
numeric_indices: Option<&[usize]>,
enum_indices: Option<&[usize]>,
) {
buf.push_str("{\"features\":[");
let mut first = true;
indices: Option<&[usize]>,
) -> Vec<Map<String, Value>> {
let mut features = Vec::with_capacity(groups.len());
for (&cell_id, aggregation) in groups {
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
continue;
};
if !first {
buf.push(',');
}
first = false;
let mut map = Map::new();
map.insert("h3".into(), Value::String(cell.to_string()));
map.insert("count".into(), Value::Number(aggregation.count.into()));
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
if let Some(indices) = numeric_indices {
for &feat_index in indices {
if aggregation.mins[feat_index].is_finite()
&& aggregation.maxs[feat_index].is_finite()
{
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
min_keys[feat_index],
aggregation.mins[feat_index],
max_keys[feat_index],
aggregation.maxs[feat_index]
);
}
}
let iter: Box<dyn Iterator<Item = usize>> = if let Some(idx) = indices {
Box::new(idx.iter().copied())
} else {
for feat_index in 0..num_features {
if aggregation.mins[feat_index].is_finite()
&& aggregation.maxs[feat_index].is_finite()
{
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
min_keys[feat_index],
aggregation.mins[feat_index],
max_keys[feat_index],
aggregation.maxs[feat_index]
);
Box::new(0..num_features)
};
for feat_index in iter {
if aggregation.mins[feat_index].is_finite()
&& aggregation.maxs[feat_index].is_finite()
{
if let (Some(min_num), Some(max_num)) = (
serde_json::Number::from_f64(aggregation.mins[feat_index] as f64),
serde_json::Number::from_f64(aggregation.maxs[feat_index] as f64),
) {
map.insert(min_keys[feat_index].clone(), Value::Number(min_num));
map.insert(max_keys[feat_index].clone(), Value::Number(max_num));
}
}
}
if let Some(indices) = enum_indices {
for &enum_index in indices {
if aggregation.enum_mins[enum_index] != ENUM_NULL {
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
enum_min_keys[enum_index],
aggregation.enum_mins[enum_index],
enum_max_keys[enum_index],
aggregation.enum_maxs[enum_index]
);
}
}
} else {
for enum_index in 0..num_enums {
if aggregation.enum_mins[enum_index] != ENUM_NULL {
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
enum_min_keys[enum_index],
aggregation.enum_mins[enum_index],
enum_max_keys[enum_index],
aggregation.enum_maxs[enum_index]
);
}
}
}
if include_postcode {
if let Some(ref postcode) = aggregation.postcode {
let total = aggregation.count as f64;
let centroid_lat = aggregation.lat_sum / total;
let centroid_lon = aggregation.lon_sum / total;
if centroid_lat.is_finite() && centroid_lon.is_finite() {
buf.push_str(",\"postcode\":\"");
write_json_escaped(buf, postcode);
let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
}
}
}
buf.push('}');
features.push(map);
}
buf.push_str("]}");
features
}
pub async fn get_hexagons(
state: Arc<AppState>,
Query(params): Query<HexagonParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
) -> Result<Json<HexagonsResponse>, (StatusCode, String)> {
let resolution = params.resolution;
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
warn!(
@ -346,50 +181,40 @@ pub async fn get_hexagons(
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
// Parse optional `fields` param into numeric and enum index sets.
// Parse optional `fields` param into feature indices.
// If `fields` is absent (None), all features are included.
// If `fields` is present (even empty string), only listed features are included.
let field_indices: Option<(Vec<usize>, Vec<usize>)> =
params.fields.as_ref().map(|fields_str| {
let mut numeric_indices = Vec::new();
let mut enum_indices = Vec::new();
if !fields_str.is_empty() {
for name in fields_str.split(',') {
let name = name.trim();
if name.is_empty() {
continue;
}
if let Some(idx) = state
.data
.feature_names
.iter()
.position(|feat| feat == name)
{
numeric_indices.push(idx);
} else if let Some(&idx) = state.enum_name_to_idx.get(name) {
enum_indices.push(idx);
}
let field_indices: Option<Vec<usize>> = params.fields.as_ref().map(|fields_str| {
if fields_str.is_empty() {
return Vec::new();
}
fields_str
.split(',')
.filter_map(|name| {
let name = name.trim();
if name.is_empty() {
return None;
}
}
(numeric_indices, enum_indices)
});
state
.data
.feature_names
.iter()
.position(|feat| feat == name)
})
.collect()
});
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
let response = tokio::task::spawn_blocking(move || -> Result<HexagonsResponse, String> {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let num_enums = state.data.num_enums;
let feature_data = &state.data.feature_data;
let enum_data = &state.data.enum_data;
let min_keys = &state.min_keys;
let max_keys = &state.max_keys;
let enum_min_keys = &state.enum_min_keys;
let enum_max_keys = &state.enum_max_keys;
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
@ -398,50 +223,20 @@ pub async fn get_hexagons(
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
// Row-level filter check: numeric must be non-NaN and within [min, max],
// enum must have value index in the allowed set
let row_passes = |row: usize| -> bool {
parsed_filters.iter().all(|filter| {
let value = feature_data[row * num_features + filter.feat_idx];
value.is_finite() && value >= filter.min && value <= filter.max
}) && parsed_enum_filters.iter().all(|enum_filter| {
let value = enum_data[row * num_enums + enum_filter.enum_idx];
value != ENUM_NULL && enum_filter.allowed.contains(&value)
})
};
// Choose aggregation strategy based on whether fields are specified
let has_selective = field_indices.is_some();
let (sel_numeric, sel_enum) = field_indices
.as_ref()
.map_or((&[][..], &[][..]), |(ni, ei)| {
(ni.as_slice(), ei.as_slice())
});
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features, num_enums));
.or_insert_with(|| CellAgg::new(num_features));
if has_selective {
aggregation.add_row_selective(feature_data, row, num_features, sel_numeric);
aggregation.add_enums_selective(enum_data, row, num_enums, sel_enum);
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
aggregation.add_row(feature_data, row, num_features);
aggregation.add_enums(enum_data, row, num_enums);
}
if include_postcode {
aggregation.add_postcode(
state.data.postcode(row),
state.data.lat[row],
state.data.lon[row],
);
}
};
// Resolve cell at requested resolution from precomputed max-resolution cell.
// For max resolution, use directly; for lower resolutions, derive parent.
let cell_for_row = |row: usize| -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
@ -458,7 +253,13 @@ pub async fn get_hexagons(
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes(row) {
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
aggregate_row(&mut groups, cell_for_row(row), row);
@ -466,19 +267,12 @@ pub async fn get_hexagons(
let t_agg = t0.elapsed();
let mut json_buf = String::with_capacity(groups.len() * 128);
write_hexagons_json(
&mut json_buf,
let features = build_feature_maps(
&groups,
min_keys,
max_keys,
num_features,
enum_min_keys,
enum_max_keys,
num_enums,
include_postcode,
field_indices.as_ref().map(|(ni, _)| ni.as_slice()),
field_indices.as_ref().map(|(_, ei)| ei.as_slice()),
field_indices.as_deref(),
);
let t_total = t0.elapsed();
@ -489,15 +283,14 @@ pub async fn get_hexagons(
filters_raw = filters_str.as_deref().unwrap_or("-"),
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
size = format_args!("{}", HumanBytes(json_buf.len())),
"GET /api/hexagons"
);
Ok(json_buf)
Ok(HexagonsResponse { features })
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok(([("content-type", "application/json")], json_body))
Ok(Json(response))
}

View file

@ -1,14 +0,0 @@
mod features;
mod hexagon_stats;
pub(crate) mod hexagons;
mod og_image;
pub(crate) mod parse;
mod pois;
pub(crate) mod properties;
pub use features::get_features;
pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
pub use og_image::get_og_image;
pub use pois::{get_poi_categories, get_pois};
pub use properties::get_hexagon_properties;

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use axum::extract::Query;
use axum::http::{header, StatusCode};
use axum::response::IntoResponse;
use tracing::{info, warn};
use crate::state::AppState;
@ -48,7 +49,7 @@ pub async fn get_og_image(
};
let url = format!("{}/screenshot{}", sidecar_url, qs);
tracing::info!("Proxying OG screenshot request to: {}", url);
info!("Proxying OG screenshot request to: {}", url);
match state.http_client.get(&url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
@ -62,18 +63,18 @@ pub async fn get_og_image(
)
.into_response(),
Err(err) => {
tracing::warn!("Failed to read sidecar response: {}", err);
warn!("Failed to read sidecar response: {}", err);
(StatusCode::BAD_GATEWAY, "Failed to read screenshot").into_response()
}
},
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("Sidecar returned status {}: {}", status, body);
warn!("Sidecar returned status {}: {}", status, body);
(StatusCode::BAD_GATEWAY, "Screenshot sidecar error").into_response()
}
Err(err) => {
tracing::warn!("Failed to reach sidecar: {}", err);
warn!("Failed to reach sidecar: {}", err);
(StatusCode::BAD_GATEWAY, "Screenshot sidecar unavailable").into_response()
}
}

View file

@ -1,52 +0,0 @@
use axum::http::StatusCode;
/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
let boundary = cell.boundary();
let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
for vertex in boundary.iter() {
let lat = vertex.lat();
let lon = vertex.lng();
if lat < min_lat {
min_lat = lat;
}
if lat > max_lat {
max_lat = lat;
}
if lon < min_lon {
min_lon = lon;
}
if lon > max_lon {
max_lon = lon;
}
}
(
min_lat - buffer,
min_lon - buffer,
max_lat + buffer,
max_lon + buffer,
)
}
pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCode, String)> {
let parts: Vec<f64> = bounds_str
.split(',')
.map(|part| part.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
Ok((parts[0], parts[1], parts[2], parts[3]))
}

View file

@ -2,15 +2,31 @@ use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::consts::MAX_POIS_PER_REQUEST;
use crate::state::{AppState, POICategoryGroup};
use crate::data::POICategoryGroup;
use crate::parsing::parse_bounds;
use crate::state::AppState;
use super::hexagons::write_json_escaped;
use super::parse::parse_bounds;
#[derive(Serialize)]
#[allow(clippy::upper_case_acronyms)]
pub struct POI {
id: String,
name: String,
category: String,
group: String,
lat: f32,
lng: f32,
emoji: String,
}
#[derive(Serialize)]
pub struct POIsResponse {
pois: Vec<POI>,
}
#[derive(Deserialize)]
pub struct POIParams {
@ -22,7 +38,7 @@ pub struct POIParams {
pub async fn get_pois(
state: Arc<AppState>,
Query(params): Query<POIParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
let bounds_str = params.bounds.ok_or((
StatusCode::BAD_REQUEST,
"bounds parameter is required".into(),
@ -43,12 +59,10 @@ pub async fn get_pois(
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
let json_body = tokio::task::spawn_blocking(move || {
let pois = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let row_indices = state.poi_grid.query(south, west, north, east);
// Collect matching row indices first, then sample randomly so the
// subset covers the viewport uniformly instead of clustering in one area.
let mut matching_rows: Vec<usize> = row_indices
.iter()
.filter_map(|&row_idx| {
@ -73,36 +87,22 @@ pub async fn get_pois(
}
}
// Write JSON directly to string buffer, avoiding intermediate POI allocations
let mut buf = String::with_capacity(matching_rows.len() * 128);
buf.push_str("{\"pois\":[");
for (i, &row) in matching_rows.iter().enumerate() {
if i > 0 {
buf.push(',');
}
buf.push_str("{\"id\":\"");
write_json_escaped(&mut buf, &state.poi_data.id[row]);
buf.push_str("\",\"name\":\"");
write_json_escaped(&mut buf, &state.poi_data.name[row]);
buf.push_str("\",\"category\":\"");
write_json_escaped(&mut buf, state.poi_data.category.get(row));
buf.push_str("\",\"group\":\"");
write_json_escaped(&mut buf, state.poi_data.group.get(row));
buf.push_str("\",\"lat\":");
buf.push_str(&state.poi_data.lat[row].to_string());
buf.push_str(",\"lng\":");
buf.push_str(&state.poi_data.lng[row].to_string());
buf.push_str(",\"emoji\":\"");
write_json_escaped(&mut buf, state.poi_data.emoji.get(row));
buf.push_str("\"}");
}
buf.push_str("]}");
let pois: Vec<POI> = matching_rows
.iter()
.map(|&row| POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji.get(row).to_string(),
})
.collect();
let elapsed = t0.elapsed();
info!(
results = matching_rows.len(),
results = pois.len(),
candidates = row_indices.len(),
categories = num_categories,
categories_raw = categories_str.as_deref().unwrap_or("-"),
@ -110,12 +110,12 @@ pub async fn get_pois(
"GET /api/pois"
);
buf
pois
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok(([("content-type", "application/json")], json_body))
Ok(Json(POIsResponse { pois }))
}
#[derive(Serialize)]

View file

@ -9,15 +9,12 @@ use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{
DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
DEFAULT_PROPERTIES_LIMIT, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
MAX_PROPERTIES_LIMIT,
};
use crate::data::EnumFeatureData;
use crate::filter::{parse_filters, row_passes_filters};
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
use super::parse::h3_cell_bounds;
#[derive(Deserialize)]
pub struct HexagonPropertiesParams {
pub h3: String,
@ -66,21 +63,25 @@ fn non_empty_string(text: &str) -> Option<String> {
}
}
/// Look up an enum feature value by trying multiple possible column names.
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
fn lookup_enum_value(
enum_features: &[EnumFeatureData],
enum_data: &[u8],
num_enums: usize,
enum_idx: &FxHashMap<String, usize>,
feature_names: &[String],
feature_data: &[f32],
num_features: usize,
enum_values: &FxHashMap<usize, Vec<String>>,
row: usize,
names: &[&str],
) -> Option<String> {
for name in names {
if let Some(&feature_index) = enum_idx.get(*name) {
let enum_feature = &enum_features[feature_index];
let data_index = enum_data[row * num_enums + feature_index];
if data_index != ENUM_NULL {
if let Some(value) = enum_feature.values.get(data_index as usize) {
return Some(value.clone());
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) {
if let Some(values) = enum_values.get(&feat_idx) {
let value = feature_data[row * num_features + feat_idx];
if value.is_finite() {
let idx = value as usize;
if let Some(str_value) = values.get(idx) {
return Some(str_value.clone());
}
}
}
}
@ -120,7 +121,7 @@ pub async fn get_hexagon_properties(
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -131,10 +132,9 @@ pub async fn get_hexagon_properties(
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
let need_parent = resolution < H3_PRECOMPUTE_MAX;
let num_features = state.data.num_features;
let num_enums = state.data.num_enums;
let feature_data = &state.data.feature_data;
let enum_data_flat = &state.data.enum_data;
let enum_features = &state.data.enum_features;
let feature_names = &state.data.feature_names;
let enum_values = &state.data.enum_values;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
@ -162,8 +162,6 @@ pub async fn get_hexagon_properties(
&parsed_enum_filters,
feature_data,
num_features,
enum_data_flat,
num_enums,
)
{
matching_rows.push(row);
@ -185,7 +183,11 @@ pub async fn get_hexagon_properties(
.map(|&row| {
let mut features = FxHashMap::default();
let base = row * num_features;
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
for (feat_idx, feat_name) in feature_names.iter().enumerate() {
// Skip enum features in the generic features map
if enum_values.contains_key(&feat_idx) {
continue;
}
let value = feature_data[base + feat_idx];
if value.is_finite() {
features.insert(feat_name.clone(), value);
@ -197,42 +199,42 @@ pub async fn get_hexagon_properties(
postcode: non_empty_string(state.data.postcode(row)),
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
property_type: lookup_enum_value(
enum_features,
enum_data_flat,
num_enums,
&state.enum_name_to_idx,
feature_names,
feature_data,
num_features,
enum_values,
row,
&["Property type", "epc_property_type", "pp_property_type"],
),
built_form: lookup_enum_value(
enum_features,
enum_data_flat,
num_enums,
&state.enum_name_to_idx,
feature_names,
feature_data,
num_features,
enum_values,
row,
&["Property type/built form", "built_form"],
),
duration: lookup_enum_value(
enum_features,
enum_data_flat,
num_enums,
&state.enum_name_to_idx,
feature_names,
feature_data,
num_features,
enum_values,
row,
&["Leashold/Freehold", "duration"],
),
current_energy_rating: lookup_enum_value(
enum_features,
enum_data_flat,
num_enums,
&state.enum_name_to_idx,
feature_names,
feature_data,
num_features,
enum_values,
row,
&["Current energy rating", "current_energy_rating"],
),
potential_energy_rating: lookup_enum_value(
enum_features,
enum_data_flat,
num_enums,
&state.enum_name_to_idx,
feature_names,
feature_data,
num_features,
enum_values,
row,
&["Potential energy rating", "potential_energy_rating"],
),