Refactor and other improvements
This commit is contained in:
parent
04a78e7bfe
commit
6c90cf3c0f
47 changed files with 2705 additions and 1568 deletions
|
|
@ -6,11 +6,13 @@ use axum::response::Json;
|
|||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use tracing::{info, warn};
|
||||
use tracing::info;
|
||||
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_CELLS_PER_REQUEST};
|
||||
use crate::aggregation::Aggregator;
|
||||
use crate::consts::MAX_CELLS_PER_REQUEST;
|
||||
use crate::parsing::{
|
||||
bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters,
|
||||
bounds_intersect, cell_for_row, h3_cell_bounds, needs_parent, parse_field_indices,
|
||||
parse_filters, require_bounds, row_passes_filters, validate_h3_resolution,
|
||||
};
|
||||
use crate::state::AppState;
|
||||
|
||||
|
|
@ -32,79 +34,9 @@ pub struct HexagonParams {
|
|||
fields: Option<String>,
|
||||
}
|
||||
|
||||
/// Per-cell accumulator for aggregating features.
|
||||
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
|
||||
struct CellAgg {
|
||||
count: u32,
|
||||
mins: Box<[f32]>,
|
||||
maxs: Box<[f32]>,
|
||||
sums: Box<[f64]>,
|
||||
feat_counts: Box<[u32]>,
|
||||
}
|
||||
|
||||
impl CellAgg {
|
||||
fn new(num_features: usize) -> Self {
|
||||
CellAgg {
|
||||
count: 0,
|
||||
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
|
||||
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
|
||||
sums: vec![0.0f64; num_features].into_boxed_slice(),
|
||||
feat_counts: vec![0u32; num_features].into_boxed_slice(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a row using row-major feature_data layout.
|
||||
/// feature_data[row * num_features + feat_idx] — all features for one row
|
||||
/// are contiguous, so this reads a single cache line per ~8 features.
|
||||
#[inline]
|
||||
fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
|
||||
self.count += 1;
|
||||
let base = row * num_features;
|
||||
let row_slice = &feature_data[base..base + num_features];
|
||||
for (feat_index, &value) in row_slice.iter().enumerate() {
|
||||
if value.is_finite() {
|
||||
if value < self.mins[feat_index] {
|
||||
self.mins[feat_index] = value;
|
||||
}
|
||||
if value > self.maxs[feat_index] {
|
||||
self.maxs[feat_index] = value;
|
||||
}
|
||||
self.sums[feat_index] += value as f64;
|
||||
self.feat_counts[feat_index] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a row, only aggregating the features at the given indices.
|
||||
#[inline]
|
||||
fn add_row_selective(
|
||||
&mut self,
|
||||
feature_data: &[f32],
|
||||
row: usize,
|
||||
num_features: usize,
|
||||
indices: &[usize],
|
||||
) {
|
||||
self.count += 1;
|
||||
let base = row * num_features;
|
||||
for &feat_index in indices {
|
||||
let value = feature_data[base + feat_index];
|
||||
if value.is_finite() {
|
||||
if value < self.mins[feat_index] {
|
||||
self.mins[feat_index] = value;
|
||||
}
|
||||
if value > self.maxs[feat_index] {
|
||||
self.maxs[feat_index] = value;
|
||||
}
|
||||
self.sums[feat_index] += value as f64;
|
||||
self.feat_counts[feat_index] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
|
||||
fn build_feature_maps(
|
||||
groups: &FxHashMap<u64, CellAgg>,
|
||||
groups: &FxHashMap<u64, Aggregator>,
|
||||
min_keys: &[String],
|
||||
max_keys: &[String],
|
||||
avg_keys: &[String],
|
||||
|
|
@ -172,26 +104,9 @@ pub async fn get_hexagons(
|
|||
Query(params): Query<HexagonParams>,
|
||||
) -> Result<Json<HexagonsResponse>, (StatusCode, String)> {
|
||||
let resolution = params.resolution;
|
||||
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
||||
warn!(
|
||||
resolution,
|
||||
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
);
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!(
|
||||
"resolution must be between {} and {}",
|
||||
H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
),
|
||||
));
|
||||
}
|
||||
validate_h3_resolution(resolution)?;
|
||||
|
||||
let bounds_str = params.bounds.ok_or((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"bounds parameter is required".into(),
|
||||
))?;
|
||||
|
||||
let (south, west, north, east) = parse_bounds(&bounds_str)?;
|
||||
let (south, west, north, east) = require_bounds(params.bounds)?;
|
||||
|
||||
let filters_str = params.filters.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
|
|
@ -201,24 +116,7 @@ pub async fn get_hexagons(
|
|||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
// Parse optional `fields` param into feature indices.
|
||||
// If `fields` is absent (None), all features are included.
|
||||
// If `fields` is present (even empty string), only listed features are included.
|
||||
let field_indices: Option<Vec<usize>> = params.fields.as_ref().map(|fields_str| {
|
||||
if fields_str.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
fields_str
|
||||
.split(',')
|
||||
.filter_map(|name| {
|
||||
let name = name.trim();
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
state.feature_name_to_index.get(name).copied()
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);
|
||||
|
||||
let response = tokio::task::spawn_blocking(move || -> Result<HexagonsResponse, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
|
@ -232,21 +130,9 @@ pub async fn get_hexagons(
|
|||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||
let precomputed = &state.h3_cells;
|
||||
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||
let need_parent = needs_parent(resolution);
|
||||
|
||||
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
return max_cell;
|
||||
}
|
||||
h3o::CellIndex::try_from(max_cell)
|
||||
.ok()
|
||||
.and_then(|ci| ci.parent(h3_res))
|
||||
.map(u64::from)
|
||||
.unwrap_or(0)
|
||||
};
|
||||
let mut groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
|
||||
|
||||
// Hoist has_selective branch outside the hot loop to avoid per-row branching
|
||||
if let Some(sel_indices) = field_indices.as_deref() {
|
||||
|
|
@ -263,10 +149,10 @@ pub async fn get_hexagons(
|
|||
) {
|
||||
return;
|
||||
}
|
||||
let cell_id = cell_for_row(row);
|
||||
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
});
|
||||
} else {
|
||||
|
|
@ -283,10 +169,10 @@ pub async fn get_hexagons(
|
|||
) {
|
||||
return;
|
||||
}
|
||||
let cell_id = cell_for_row(row);
|
||||
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue