375 lines
13 KiB
Rust
375 lines
13 KiB
Rust
use std::fmt::{self, Write};
|
|
use std::sync::Arc;
|
|
|
|
use axum::extract::Query;
|
|
use axum::http::StatusCode;
|
|
use axum::response::IntoResponse;
|
|
use rustc_hash::FxHashMap;
|
|
use serde::Deserialize;
|
|
use tracing::{info, warn};
|
|
|
|
use crate::consts::{
|
|
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
|
POSTCODE_MIN_RESOLUTION,
|
|
};
|
|
use crate::filter::parse_filters;
|
|
use crate::state::AppState;
|
|
|
|
use super::parse::parse_bounds;
|
|
|
|
struct HumanBytes(usize);
|
|
|
|
impl fmt::Display for HumanBytes {
|
|
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let bytes = self.0;
|
|
if bytes >= 1_000_000 {
|
|
write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
|
|
} else if bytes >= 1_000 {
|
|
write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
|
|
} else {
|
|
write!(formatter, "{} B", bytes)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct HexagonParams {
|
|
resolution: u8,
|
|
bounds: Option<String>,
|
|
/// Comma-separated filters: `name:min:max,...`
|
|
/// Rows must have non-NaN values within [min,max] for each filter.
|
|
filters: Option<String>,
|
|
}
|
|
|
|
/// Per-cell accumulator for aggregating features
|
|
struct CellAgg {
|
|
count: u32,
|
|
mins: Vec<f32>,
|
|
maxs: Vec<f32>,
|
|
/// Min/max ordinal indices for enum features (255 = no data yet)
|
|
enum_mins: Vec<u8>,
|
|
enum_maxs: Vec<u8>,
|
|
/// Most common postcode in this cell (only tracked at high resolutions)
|
|
postcode: Option<String>,
|
|
postcode_count: u32,
|
|
lat_sum: f64,
|
|
lon_sum: f64,
|
|
}
|
|
|
|
impl CellAgg {
|
|
fn new(num_features: usize, num_enums: usize) -> Self {
|
|
CellAgg {
|
|
count: 0,
|
|
mins: vec![f32::INFINITY; num_features],
|
|
maxs: vec![f32::NEG_INFINITY; num_features],
|
|
enum_mins: vec![ENUM_NULL; num_enums],
|
|
enum_maxs: vec![0; num_enums],
|
|
postcode: None,
|
|
postcode_count: 0,
|
|
lat_sum: 0.0,
|
|
lon_sum: 0.0,
|
|
}
|
|
}
|
|
|
|
/// Add a row using row-major feature_data layout.
|
|
/// feature_data[row * num_features + feat_idx] — all features for one row
|
|
/// are contiguous, so this reads a single cache line per ~8 features.
|
|
#[inline]
|
|
fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
|
|
self.count += 1;
|
|
let base = row * num_features;
|
|
let row_slice = &feature_data[base..base + num_features];
|
|
for (feat_index, &value) in row_slice.iter().enumerate() {
|
|
if value.is_finite() {
|
|
if value < self.mins[feat_index] {
|
|
self.mins[feat_index] = value;
|
|
}
|
|
if value > self.maxs[feat_index] {
|
|
self.maxs[feat_index] = value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Track min/max ordinal index for each enum feature in this cell.
|
|
#[inline]
|
|
fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
|
|
for (enum_index, enum_feature) in enum_features.iter().enumerate() {
|
|
let value = enum_feature.data[row];
|
|
if value != ENUM_NULL {
|
|
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
|
self.enum_mins[enum_index] = value;
|
|
}
|
|
if value > self.enum_maxs[enum_index] {
|
|
self.enum_maxs[enum_index] = value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Track postcode and centroid for high-resolution cells.
|
|
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
|
|
#[inline]
|
|
fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) {
|
|
self.lat_sum += lat as f64;
|
|
self.lon_sum += lon as f64;
|
|
if postcode.is_empty() {
|
|
return;
|
|
}
|
|
if self.postcode.is_none() {
|
|
self.postcode = Some(postcode.to_string());
|
|
self.postcode_count = 1;
|
|
} else if self.postcode.as_deref() == Some(postcode) {
|
|
self.postcode_count += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Escape a string for inclusion in a JSON string literal.
|
|
pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
|
|
for character in text.chars() {
|
|
match character {
|
|
'"' => buf.push_str("\\\""),
|
|
'\\' => buf.push_str("\\\\"),
|
|
'\n' => buf.push_str("\\n"),
|
|
'\r' => buf.push_str("\\r"),
|
|
'\t' => buf.push_str("\\t"),
|
|
ctrl if ctrl < '\x20' => { let _ = write!(buf, "\\u{:04x}", ctrl as u32); }
|
|
other => buf.push(other),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write the hexagons JSON response directly to a String buffer,
|
|
/// avoiding serde_json::Value allocations entirely.
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn write_hexagons_json(
|
|
buf: &mut String,
|
|
groups: &FxHashMap<u64, CellAgg>,
|
|
min_keys: &[String],
|
|
max_keys: &[String],
|
|
num_features: usize,
|
|
enum_min_keys: &[String],
|
|
enum_max_keys: &[String],
|
|
num_enums: usize,
|
|
include_postcode: bool,
|
|
) {
|
|
buf.push_str("{\"features\":[");
|
|
let mut first = true;
|
|
for (&cell_id, aggregation) in groups {
|
|
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
|
|
continue;
|
|
};
|
|
|
|
if !first {
|
|
buf.push(',');
|
|
}
|
|
first = false;
|
|
|
|
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
|
|
|
for feat_index in 0..num_features {
|
|
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
|
let _ = write!(
|
|
buf,
|
|
",\"{}\":{},\"{}\":{}",
|
|
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
|
);
|
|
}
|
|
}
|
|
|
|
for enum_index in 0..num_enums {
|
|
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
|
let _ = write!(
|
|
buf,
|
|
",\"{}\":{},\"{}\":{}",
|
|
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
|
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
|
);
|
|
}
|
|
}
|
|
|
|
if include_postcode {
|
|
if let Some(ref postcode) = aggregation.postcode {
|
|
let total = aggregation.count as f64;
|
|
let centroid_lat = aggregation.lat_sum / total;
|
|
let centroid_lon = aggregation.lon_sum / total;
|
|
if centroid_lat.is_finite() && centroid_lon.is_finite() {
|
|
buf.push_str(",\"postcode\":\"");
|
|
write_json_escaped(buf, postcode);
|
|
let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
|
|
}
|
|
}
|
|
}
|
|
|
|
buf.push('}');
|
|
}
|
|
buf.push_str("]}");
|
|
}
|
|
|
|
pub async fn get_hexagons(
|
|
state: Arc<AppState>,
|
|
Query(params): Query<HexagonParams>,
|
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
|
let resolution = params.resolution;
|
|
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
|
warn!(
|
|
resolution,
|
|
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
|
|
);
|
|
return Err((
|
|
StatusCode::BAD_REQUEST,
|
|
format!(
|
|
"resolution must be between {} and {}",
|
|
H3_REQUEST_MIN, H3_REQUEST_MAX
|
|
),
|
|
));
|
|
}
|
|
|
|
let bounds_str = params.bounds.ok_or((
|
|
StatusCode::BAD_REQUEST,
|
|
"bounds parameter is required".into(),
|
|
))?;
|
|
|
|
let (mut south, mut west, mut north, mut east) = parse_bounds(&bounds_str)?;
|
|
|
|
let lat_range = north - south;
|
|
let lng_range = east - west;
|
|
south -= lat_range * BOUNDS_BUFFER_PERCENT;
|
|
north += lat_range * BOUNDS_BUFFER_PERCENT;
|
|
west -= lng_range * BOUNDS_BUFFER_PERCENT;
|
|
east += lng_range * BOUNDS_BUFFER_PERCENT;
|
|
|
|
south = (south / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
|
|
west = (west / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
|
|
north = (north / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
|
|
east = (east / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
|
|
|
|
let filters_str = params.filters.clone();
|
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
|
params.filters.as_deref(),
|
|
&state.data.feature_names,
|
|
&state.data.enum_features,
|
|
);
|
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
|
|
|
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
|
let t0 = std::time::Instant::now();
|
|
|
|
let num_features = state.data.num_features;
|
|
let num_enums = state.data.enum_features.len();
|
|
let feature_data = &state.data.feature_data;
|
|
|
|
let min_keys = &state.min_keys;
|
|
let max_keys = &state.max_keys;
|
|
let enum_min_keys = &state.enum_min_keys;
|
|
let enum_max_keys = &state.enum_max_keys;
|
|
|
|
let h3_cells_for_res: Option<&[u64]> = state
|
|
.h3_cells
|
|
.get(resolution as usize)
|
|
.filter(|cells| !cells.is_empty())
|
|
.map(|cells| cells.as_slice());
|
|
|
|
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
|
|
|
let enum_features = &state.data.enum_features;
|
|
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
|
|
|
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
|
// enum must have value index in the allowed set
|
|
let row_passes = |row: usize| -> bool {
|
|
parsed_filters.iter().all(|filter| {
|
|
let value = feature_data[row * num_features + filter.feat_idx];
|
|
value.is_finite() && value >= filter.min && value <= filter.max
|
|
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
|
let value = enum_features[enum_filter.enum_idx].data[row];
|
|
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
|
})
|
|
};
|
|
|
|
if let Some(precomputed) = h3_cells_for_res {
|
|
state
|
|
.grid
|
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
|
let row = row_idx as usize;
|
|
if !row_passes(row) {
|
|
return;
|
|
}
|
|
let cell_id = precomputed[row];
|
|
let aggregation = groups
|
|
.entry(cell_id)
|
|
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
|
aggregation.add_row(feature_data, row, num_features);
|
|
aggregation.add_enums(enum_features, row);
|
|
if include_postcode {
|
|
aggregation.add_postcode(
|
|
state.data.postcode(row),
|
|
state.data.lat[row],
|
|
state.data.lon[row],
|
|
);
|
|
}
|
|
});
|
|
} else {
|
|
let h3_res = h3o::Resolution::try_from(resolution)
|
|
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
|
state
|
|
.grid
|
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
|
let row = row_idx as usize;
|
|
if !row_passes(row) {
|
|
return;
|
|
}
|
|
let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
|
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
|
.unwrap_or(0);
|
|
let aggregation = groups
|
|
.entry(cell_id)
|
|
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
|
aggregation.add_row(feature_data, row, num_features);
|
|
aggregation.add_enums(enum_features, row);
|
|
if include_postcode {
|
|
aggregation.add_postcode(
|
|
state.data.postcode(row),
|
|
state.data.lat[row],
|
|
state.data.lon[row],
|
|
);
|
|
}
|
|
});
|
|
}
|
|
|
|
let t_agg = t0.elapsed();
|
|
|
|
let mut json_buf = String::with_capacity(groups.len() * 128);
|
|
write_hexagons_json(
|
|
&mut json_buf,
|
|
&groups,
|
|
min_keys,
|
|
max_keys,
|
|
num_features,
|
|
enum_min_keys,
|
|
enum_max_keys,
|
|
num_enums,
|
|
include_postcode,
|
|
);
|
|
|
|
let t_total = t0.elapsed();
|
|
info!(
|
|
resolution,
|
|
cells = groups.len(),
|
|
filters = num_filters,
|
|
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
|
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
|
|
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
|
|
size = format_args!("{}", HumanBytes(json_buf.len())),
|
|
"GET /api/hexagons"
|
|
);
|
|
|
|
Ok(json_buf)
|
|
})
|
|
.await
|
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
|
|
|
Ok(([("content-type", "application/json")], json_body))
|
|
}
|