Optimise
This commit is contained in:
parent
9179acd4cd
commit
2c613dc0d1
14 changed files with 376 additions and 188 deletions
|
|
@ -9,8 +9,8 @@ use serde::Deserialize;
|
|||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
||||
POSTCODE_MIN_RESOLUTION,
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX,
|
||||
H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION,
|
||||
};
|
||||
use crate::filter::parse_filters;
|
||||
use crate::state::AppState;
|
||||
|
|
@ -39,16 +39,21 @@ pub struct HexagonParams {
|
|||
/// Comma-separated filters: `name:min:max,...`
|
||||
/// Rows must have non-NaN values within [min,max] for each filter.
|
||||
filters: Option<String>,
|
||||
/// Comma-separated feature names to include in min/max aggregation.
|
||||
/// When present (even if empty), only listed features are aggregated and written.
|
||||
/// When absent, all features are included (backward compatible).
|
||||
fields: Option<String>,
|
||||
}
|
||||
|
||||
/// Per-cell accumulator for aggregating features
|
||||
/// Per-cell accumulator for aggregating features.
|
||||
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
|
||||
struct CellAgg {
|
||||
count: u32,
|
||||
mins: Vec<f32>,
|
||||
maxs: Vec<f32>,
|
||||
mins: Box<[f32]>,
|
||||
maxs: Box<[f32]>,
|
||||
/// Min/max ordinal indices for enum features (255 = no data yet)
|
||||
enum_mins: Vec<u8>,
|
||||
enum_maxs: Vec<u8>,
|
||||
enum_mins: Box<[u8]>,
|
||||
enum_maxs: Box<[u8]>,
|
||||
/// Most common postcode in this cell (only tracked at high resolutions)
|
||||
postcode: Option<String>,
|
||||
postcode_count: u32,
|
||||
|
|
@ -60,10 +65,10 @@ impl CellAgg {
|
|||
fn new(num_features: usize, num_enums: usize) -> Self {
|
||||
CellAgg {
|
||||
count: 0,
|
||||
mins: vec![f32::INFINITY; num_features],
|
||||
maxs: vec![f32::NEG_INFINITY; num_features],
|
||||
enum_mins: vec![ENUM_NULL; num_enums],
|
||||
enum_maxs: vec![0; num_enums],
|
||||
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
|
||||
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
|
||||
enum_mins: vec![ENUM_NULL; num_enums].into_boxed_slice(),
|
||||
enum_maxs: vec![0; num_enums].into_boxed_slice(),
|
||||
postcode: None,
|
||||
postcode_count: 0,
|
||||
lat_sum: 0.0,
|
||||
|
|
@ -93,9 +98,45 @@ impl CellAgg {
|
|||
|
||||
/// Track min/max ordinal index for each enum feature in this cell.
|
||||
#[inline]
|
||||
fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
|
||||
for (enum_index, enum_feature) in enum_features.iter().enumerate() {
|
||||
let value = enum_feature.data[row];
|
||||
fn add_enums(&mut self, enum_data: &[u8], row: usize, num_enums: usize) {
|
||||
let base = row * num_enums;
|
||||
let row_slice = &enum_data[base..base + num_enums];
|
||||
for (enum_index, &value) in row_slice.iter().enumerate() {
|
||||
if value != ENUM_NULL {
|
||||
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||
self.enum_mins[enum_index] = value;
|
||||
}
|
||||
if value > self.enum_maxs[enum_index] {
|
||||
self.enum_maxs[enum_index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a row, only aggregating the features at the given indices.
|
||||
#[inline]
|
||||
fn add_row_selective(&mut self, feature_data: &[f32], row: usize, num_features: usize, indices: &[usize]) {
|
||||
self.count += 1;
|
||||
let base = row * num_features;
|
||||
for &feat_index in indices {
|
||||
let value = feature_data[base + feat_index];
|
||||
if value.is_finite() {
|
||||
if value < self.mins[feat_index] {
|
||||
self.mins[feat_index] = value;
|
||||
}
|
||||
if value > self.maxs[feat_index] {
|
||||
self.maxs[feat_index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Track min/max ordinal index for selected enum features only.
|
||||
#[inline]
|
||||
fn add_enums_selective(&mut self, enum_data: &[u8], row: usize, num_enums: usize, indices: &[usize]) {
|
||||
let base = row * num_enums;
|
||||
for &enum_index in indices {
|
||||
let value = enum_data[base + enum_index];
|
||||
if value != ENUM_NULL {
|
||||
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||
self.enum_mins[enum_index] = value;
|
||||
|
|
@ -142,6 +183,7 @@ pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
|
|||
|
||||
/// Write the hexagons JSON response directly to a String buffer,
|
||||
/// avoiding serde_json::Value allocations entirely.
|
||||
/// When `numeric_indices` / `enum_indices` are Some, only those features are written.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn write_hexagons_json(
|
||||
buf: &mut String,
|
||||
|
|
@ -153,6 +195,8 @@ fn write_hexagons_json(
|
|||
enum_max_keys: &[String],
|
||||
num_enums: usize,
|
||||
include_postcode: bool,
|
||||
numeric_indices: Option<&[usize]>,
|
||||
enum_indices: Option<&[usize]>,
|
||||
) {
|
||||
buf.push_str("{\"features\":[");
|
||||
let mut first = true;
|
||||
|
|
@ -168,24 +212,49 @@ fn write_hexagons_json(
|
|||
|
||||
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
||||
|
||||
for feat_index in 0..num_features {
|
||||
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||
);
|
||||
if let Some(indices) = numeric_indices {
|
||||
for &feat_index in indices {
|
||||
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for feat_index in 0..num_features {
|
||||
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for enum_index in 0..num_enums {
|
||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||
);
|
||||
if let Some(indices) = enum_indices {
|
||||
for &enum_index in indices {
|
||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for enum_index in 0..num_enums {
|
||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -253,27 +322,48 @@ pub async fn get_hexagons(
|
|||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
// Parse optional `fields` param into numeric and enum index sets.
|
||||
// If `fields` is absent (None), all features are included.
|
||||
// If `fields` is present (even empty string), only listed features are included.
|
||||
let field_indices: Option<(Vec<usize>, Vec<usize>)> = params.fields.as_ref().map(|fields_str| {
|
||||
let mut numeric_indices = Vec::new();
|
||||
let mut enum_indices = Vec::new();
|
||||
if !fields_str.is_empty() {
|
||||
for name in fields_str.split(',') {
|
||||
let name = name.trim();
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Some(idx) = state.data.feature_names.iter().position(|feat| feat == name) {
|
||||
numeric_indices.push(idx);
|
||||
} else if let Some(&idx) = state.enum_name_to_idx.get(name) {
|
||||
enum_indices.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
(numeric_indices, enum_indices)
|
||||
});
|
||||
|
||||
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
||||
let num_features = state.data.num_features;
|
||||
let num_enums = state.data.enum_features.len();
|
||||
let num_enums = state.data.num_enums;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_data = &state.data.enum_data;
|
||||
|
||||
let min_keys = &state.min_keys;
|
||||
let max_keys = &state.max_keys;
|
||||
let enum_min_keys = &state.enum_min_keys;
|
||||
let enum_max_keys = &state.enum_max_keys;
|
||||
|
||||
let h3_cells_for_res: Option<&[u64]> = state
|
||||
.h3_cells
|
||||
.get(resolution as usize)
|
||||
.filter(|cells| !cells.is_empty())
|
||||
.map(|cells| cells.as_slice());
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||
let precomputed = &state.h3_cells;
|
||||
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||
|
||||
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||
|
||||
let enum_features = &state.data.enum_features;
|
||||
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
||||
|
||||
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
||||
|
|
@ -283,60 +373,58 @@ pub async fn get_hexagons(
|
|||
let value = feature_data[row * num_features + filter.feat_idx];
|
||||
value.is_finite() && value >= filter.min && value <= filter.max
|
||||
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
||||
let value = enum_features[enum_filter.enum_idx].data[row];
|
||||
let value = enum_data[row * num_enums + enum_filter.enum_idx];
|
||||
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||
})
|
||||
};
|
||||
|
||||
if let Some(precomputed) = h3_cells_for_res {
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes(row) {
|
||||
return;
|
||||
}
|
||||
let cell_id = precomputed[row];
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
aggregation.add_enums(enum_features, row);
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes(row) {
|
||||
return;
|
||||
}
|
||||
let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
||||
.unwrap_or(0);
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
aggregation.add_enums(enum_features, row);
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
// Choose aggregation strategy based on whether fields are specified
|
||||
let has_selective = field_indices.is_some();
|
||||
let (sel_numeric, sel_enum) = field_indices.as_ref().map_or((&[][..], &[][..]), |(ni, ei)| (ni.as_slice(), ei.as_slice()));
|
||||
|
||||
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||
if has_selective {
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_numeric);
|
||||
aggregation.add_enums_selective(enum_data, row, num_enums, sel_enum);
|
||||
} else {
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
aggregation.add_enums(enum_data, row, num_enums);
|
||||
}
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve cell at requested resolution from precomputed max-resolution cell.
|
||||
// For max resolution, use directly; for lower resolutions, derive parent.
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
return max_cell;
|
||||
}
|
||||
h3o::CellIndex::try_from(max_cell)
|
||||
.ok()
|
||||
.and_then(|ci| ci.parent(h3_res))
|
||||
.map(u64::from)
|
||||
.unwrap_or(0)
|
||||
};
|
||||
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes(row) {
|
||||
return;
|
||||
}
|
||||
aggregate_row(&mut groups, cell_for_row(row), row);
|
||||
});
|
||||
|
||||
let t_agg = t0.elapsed();
|
||||
|
||||
|
|
@ -351,6 +439,8 @@ pub async fn get_hexagons(
|
|||
enum_max_keys,
|
||||
num_enums,
|
||||
include_postcode,
|
||||
field_indices.as_ref().map(|(ni, _)| ni.as_slice()),
|
||||
field_indices.as_ref().map(|(_, ei)| ei.as_slice()),
|
||||
);
|
||||
|
||||
let t_total = t0.elapsed();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue