Refactor and other improvements

This commit is contained in:
Andras Schmelczer 2026-02-08 18:25:58 +00:00
parent 04a78e7bfe
commit 6c90cf3c0f
47 changed files with 2705 additions and 1568 deletions

View file

@ -6,11 +6,13 @@ use axum::response::Json;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::{info, warn};
use tracing::info;
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_CELLS_PER_REQUEST};
use crate::aggregation::Aggregator;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::parsing::{
bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters,
bounds_intersect, cell_for_row, h3_cell_bounds, needs_parent, parse_field_indices,
parse_filters, require_bounds, row_passes_filters, validate_h3_resolution,
};
use crate::state::AppState;
@ -32,79 +34,9 @@ pub struct HexagonParams {
fields: Option<String>,
}
/// Per-cell accumulator for aggregating features.
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
struct CellAgg {
count: u32,
mins: Box<[f32]>,
maxs: Box<[f32]>,
sums: Box<[f64]>,
feat_counts: Box<[u32]>,
}
impl CellAgg {
fn new(num_features: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
sums: vec![0.0f64; num_features].into_boxed_slice(),
feat_counts: vec![0u32; num_features].into_boxed_slice(),
}
}
/// Add a row using row-major feature_data layout.
/// feature_data[row * num_features + feat_idx] — all features for one row
/// are contiguous, so this reads a single cache line per ~8 features.
#[inline]
fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (feat_index, &value) in row_slice.iter().enumerate() {
if value.is_finite() {
if value < self.mins[feat_index] {
self.mins[feat_index] = value;
}
if value > self.maxs[feat_index] {
self.maxs[feat_index] = value;
}
self.sums[feat_index] += value as f64;
self.feat_counts[feat_index] += 1;
}
}
}
/// Add a row, only aggregating the features at the given indices.
#[inline]
fn add_row_selective(
&mut self,
feature_data: &[f32],
row: usize,
num_features: usize,
indices: &[usize],
) {
self.count += 1;
let base = row * num_features;
for &feat_index in indices {
let value = feature_data[base + feat_index];
if value.is_finite() {
if value < self.mins[feat_index] {
self.mins[feat_index] = value;
}
if value > self.maxs[feat_index] {
self.maxs[feat_index] = value;
}
self.sums[feat_index] += value as f64;
self.feat_counts[feat_index] += 1;
}
}
}
}
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
fn build_feature_maps(
groups: &FxHashMap<u64, CellAgg>,
groups: &FxHashMap<u64, Aggregator>,
min_keys: &[String],
max_keys: &[String],
avg_keys: &[String],
@ -172,26 +104,9 @@ pub async fn get_hexagons(
Query(params): Query<HexagonParams>,
) -> Result<Json<HexagonsResponse>, (StatusCode, String)> {
let resolution = params.resolution;
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
warn!(
resolution,
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
);
return Err((
StatusCode::BAD_REQUEST,
format!(
"resolution must be between {} and {}",
H3_REQUEST_MIN, H3_REQUEST_MAX
),
));
}
validate_h3_resolution(resolution)?;
let bounds_str = params.bounds.ok_or((
StatusCode::BAD_REQUEST,
"bounds parameter is required".into(),
))?;
let (south, west, north, east) = parse_bounds(&bounds_str)?;
let (south, west, north, east) = require_bounds(params.bounds)?;
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
@ -201,24 +116,7 @@ pub async fn get_hexagons(
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
// Parse optional `fields` param into feature indices.
// If `fields` is absent (None), all features are included.
// If `fields` is present (even empty string), only listed features are included.
let field_indices: Option<Vec<usize>> = params.fields.as_ref().map(|fields_str| {
if fields_str.is_empty() {
return Vec::new();
}
fields_str
.split(',')
.filter_map(|name| {
let name = name.trim();
if name.is_empty() {
return None;
}
state.feature_name_to_index.get(name).copied()
})
.collect()
});
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);
let response = tokio::task::spawn_blocking(move || -> Result<HexagonsResponse, String> {
let t0 = std::time::Instant::now();
@ -232,21 +130,9 @@ pub async fn get_hexagons(
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
let precomputed = &state.h3_cells;
let need_parent = resolution < H3_PRECOMPUTE_MAX;
let need_parent = needs_parent(resolution);
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let cell_for_row = |row: usize| -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
return max_cell;
}
h3o::CellIndex::try_from(max_cell)
.ok()
.and_then(|ci| ci.parent(h3_res))
.map(u64::from)
.unwrap_or(0)
};
let mut groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
// Hoist has_selective branch outside the hot loop to avoid per-row branching
if let Some(sel_indices) = field_indices.as_deref() {
@ -263,10 +149,10 @@ pub async fn get_hexagons(
) {
return;
}
let cell_id = cell_for_row(row);
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
.or_insert_with(|| Aggregator::new(num_features));
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
});
} else {
@ -283,10 +169,10 @@ pub async fn get_hexagons(
) {
return;
}
let cell_id = cell_for_row(row);
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
.or_insert_with(|| Aggregator::new(num_features));
aggregation.add_row(feature_data, row, num_features);
});
}