Refactor and improve
This commit is contained in:
parent
1f148b2185
commit
242acff987
22 changed files with 754 additions and 1053 deletions
|
|
@ -1,18 +1,50 @@
|
|||
use std::fmt::Write;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::IntoResponse;
|
||||
use serde::Deserialize;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
use super::parse::h3_cell_bounds;
|
||||
#[derive(Serialize)]
|
||||
pub struct HistogramStats {
|
||||
min: f64,
|
||||
max: f64,
|
||||
/// 1st percentile (left edge of main distribution)
|
||||
p1: f64,
|
||||
/// 99th percentile (right edge of main distribution)
|
||||
p99: f64,
|
||||
counts: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct NumericFeatureStats {
|
||||
name: String,
|
||||
count: usize,
|
||||
min: f64,
|
||||
max: f64,
|
||||
mean: f64,
|
||||
histogram: HistogramStats,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct EnumFeatureStats {
|
||||
name: String,
|
||||
counts: HashMap<String, u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct HexagonStatsResponse {
|
||||
count: usize,
|
||||
numeric_features: Vec<NumericFeatureStats>,
|
||||
enum_features: Vec<EnumFeatureStats>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct HexagonStatsParams {
|
||||
|
|
@ -20,15 +52,14 @@ pub struct HexagonStatsParams {
|
|||
pub resolution: u8,
|
||||
pub filters: Option<String>,
|
||||
/// Comma-separated feature names to include in stats response.
|
||||
/// When present (even if empty), only listed features are computed.
|
||||
/// When absent, all features are returned (backward compatible).
|
||||
/// Only listed features are computed; if absent or empty, no features are returned.
|
||||
pub fields: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_hexagon_stats(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<HexagonStatsParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
) -> Result<Json<HexagonStatsResponse>, (StatusCode, String)> {
|
||||
let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| {
|
||||
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
|
||||
(
|
||||
|
|
@ -57,36 +88,34 @@ pub async fn get_hexagon_stats(
|
|||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.data.enum_features,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
// Parse optional `fields` param into sets of feature names.
|
||||
// None = include all, Some = only include listed features.
|
||||
let field_set: Option<std::collections::HashSet<String>> =
|
||||
params.fields.as_ref().map(|fields_str| {
|
||||
let fields_specified = params.fields.is_some();
|
||||
let field_set: std::collections::HashSet<String> = params
|
||||
.fields
|
||||
.as_ref()
|
||||
.map(|fields_str| {
|
||||
fields_str
|
||||
.split(',')
|
||||
.map(|field| field.trim().to_string())
|
||||
.filter(|field| !field.is_empty())
|
||||
.collect()
|
||||
});
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let response = tokio::task::spawn_blocking(move || {
|
||||
let start_time = std::time::Instant::now();
|
||||
let precomputed = &state.h3_cells;
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||
let num_features = state.data.num_features;
|
||||
let num_enums = state.data.num_enums;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_data = &state.data.enum_data;
|
||||
let enum_features = &state.data.enum_features;
|
||||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
||||
// Resolve cell at requested resolution from precomputed max-resolution cell
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
|
|
@ -99,7 +128,6 @@ pub async fn get_hexagon_stats(
|
|||
.unwrap_or(0)
|
||||
};
|
||||
|
||||
// Collect matching rows
|
||||
let mut matching_rows: Vec<usize> = Vec::new();
|
||||
state
|
||||
.grid
|
||||
|
|
@ -112,8 +140,6 @@ pub async fn get_hexagon_stats(
|
|||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_data,
|
||||
num_enums,
|
||||
)
|
||||
{
|
||||
matching_rows.push(row);
|
||||
|
|
@ -122,140 +148,108 @@ pub async fn get_hexagon_stats(
|
|||
|
||||
let total_count = matching_rows.len();
|
||||
|
||||
// Build JSON directly via string buffer
|
||||
let mut output = String::with_capacity(4096);
|
||||
output.push_str("{\"count\":");
|
||||
write!(output, "{}", total_count).unwrap();
|
||||
let mut numeric_features = Vec::new();
|
||||
let mut enum_features_out = Vec::new();
|
||||
|
||||
// Numeric features: compute count, min, max, sum, histogram using global bin edges
|
||||
output.push_str(",\"numeric_features\":[");
|
||||
let mut first_numeric = true;
|
||||
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
|
||||
// Skip features not in the requested set (when fields param is present)
|
||||
if let Some(ref set) = field_set {
|
||||
if !set.contains(feature_name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let global_stats = &state.data.feature_stats[feature_index];
|
||||
let histogram_min = global_stats.histogram.min;
|
||||
let histogram_max = global_stats.histogram.max;
|
||||
let bin_width = global_stats.histogram.bin_width;
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64; // keep f64 for mean precision
|
||||
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||
|
||||
for &row in &matching_rows {
|
||||
let value = feature_data[row * num_features + feature_index];
|
||||
if value.is_finite() {
|
||||
count += 1;
|
||||
if value < min_value {
|
||||
min_value = value;
|
||||
}
|
||||
if value > max_value {
|
||||
max_value = value;
|
||||
}
|
||||
sum += value as f64;
|
||||
|
||||
// Bin into histogram using global edges (cast to f64 for bin index math)
|
||||
if bin_width > 0.0 {
|
||||
let bin_index = ((value as f64 - histogram_min as f64) / bin_width as f64)
|
||||
.floor() as isize;
|
||||
let clamped_index =
|
||||
bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
|
||||
bins[clamped_index] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
if fields_specified && !field_set.contains(feature_name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !first_numeric {
|
||||
output.push(',');
|
||||
}
|
||||
first_numeric = false;
|
||||
|
||||
let mean = sum / count as f64;
|
||||
output.push_str("{\"name\":");
|
||||
write_json_string(&mut output, feature_name);
|
||||
write!(output, ",\"count\":{}", count).unwrap();
|
||||
write!(output, ",\"min\":{}", format_num(min_value)).unwrap();
|
||||
write!(output, ",\"max\":{}", format_num(max_value)).unwrap();
|
||||
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
|
||||
output.push_str(",\"histogram\":{\"min\":");
|
||||
write!(output, "{}", format_num(histogram_min)).unwrap();
|
||||
output.push_str(",\"max\":");
|
||||
write!(output, "{}", format_num(histogram_max)).unwrap();
|
||||
output.push_str(",\"bin_width\":");
|
||||
write!(output, "{}", format_num(bin_width)).unwrap();
|
||||
output.push_str(",\"counts\":[");
|
||||
for (bin_index, &bin_count) in bins.iter().enumerate() {
|
||||
if bin_index > 0 {
|
||||
output.push(',');
|
||||
// Check if this is an enum feature
|
||||
if let Some(enum_values) = state.data.enum_values.get(&feature_index) {
|
||||
// Enum feature: count occurrences of each value
|
||||
let mut value_counts = vec![0u64; enum_values.len()];
|
||||
for &row in &matching_rows {
|
||||
let value = feature_data[row * num_features + feature_index];
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if idx < value_counts.len() {
|
||||
value_counts[idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let counts: HashMap<String, u64> = value_counts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, &count)| count > 0)
|
||||
.map(|(idx, &count)| (enum_values[idx].clone(), count))
|
||||
.collect();
|
||||
|
||||
if !counts.is_empty() {
|
||||
enum_features_out.push(EnumFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
counts,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Numeric feature: compute stats and histogram
|
||||
let global_hist = &state.data.feature_stats[feature_index].histogram;
|
||||
let p1 = global_hist.p1;
|
||||
let p99 = global_hist.p99;
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64;
|
||||
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||
|
||||
// Compute middle bin width (between p1 and p99)
|
||||
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
|
||||
let middle_width = if middle_bins > 0 && p99 > p1 {
|
||||
(p99 - p1) / middle_bins as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
for &row in &matching_rows {
|
||||
let value = feature_data[row * num_features + feature_index];
|
||||
if value.is_finite() {
|
||||
count += 1;
|
||||
if value < min_value {
|
||||
min_value = value;
|
||||
}
|
||||
if value > max_value {
|
||||
max_value = value;
|
||||
}
|
||||
sum += value as f64;
|
||||
|
||||
// Bin using p1/p99 outlier structure
|
||||
let bin = if value < p1 {
|
||||
0 // Low outlier bin
|
||||
} else if value >= p99 {
|
||||
HISTOGRAM_BINS - 1 // High outlier bin
|
||||
} else if middle_width > 0.0 {
|
||||
// Middle bins (1 to n-2)
|
||||
let middle_bin = ((value - p1) / middle_width) as usize;
|
||||
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
|
||||
} else {
|
||||
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
|
||||
};
|
||||
bins[bin] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
numeric_features.push(NumericFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
count,
|
||||
min: min_value as f64,
|
||||
max: max_value as f64,
|
||||
mean: sum / count as f64,
|
||||
histogram: HistogramStats {
|
||||
min: global_hist.min as f64,
|
||||
max: global_hist.max as f64,
|
||||
p1: p1 as f64,
|
||||
p99: p99 as f64,
|
||||
counts: bins,
|
||||
},
|
||||
});
|
||||
}
|
||||
write!(output, "{}", bin_count).unwrap();
|
||||
}
|
||||
output.push_str("]}}")
|
||||
}
|
||||
|
||||
// Enum features: count per value
|
||||
output.push_str("],\"enum_features\":[");
|
||||
let mut first_enum = true;
|
||||
for enum_feature in enum_features {
|
||||
// Skip enum features not in the requested set
|
||||
if let Some(ref set) = field_set {
|
||||
if !set.contains(enum_feature.name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
|
||||
Some(&index) => index,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut value_counts = vec![0u64; enum_feature.values.len()];
|
||||
for &row in &matching_rows {
|
||||
let value = enum_data[row * num_enums + enum_index];
|
||||
if value != ENUM_NULL && (value as usize) < value_counts.len() {
|
||||
value_counts[value as usize] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Only include if there are any non-zero counts
|
||||
let has_values = value_counts.iter().any(|&count| count > 0);
|
||||
if !has_values {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !first_enum {
|
||||
output.push(',');
|
||||
}
|
||||
first_enum = false;
|
||||
|
||||
output.push_str("{\"name\":");
|
||||
write_json_string(&mut output, &enum_feature.name);
|
||||
output.push_str(",\"counts\":{");
|
||||
let mut first_value = true;
|
||||
for (value_index, &count) in value_counts.iter().enumerate() {
|
||||
if count == 0 {
|
||||
continue;
|
||||
}
|
||||
if !first_value {
|
||||
output.push(',');
|
||||
}
|
||||
first_value = false;
|
||||
write_json_string(&mut output, &enum_feature.values[value_index]);
|
||||
write!(output, ":{}", count).unwrap();
|
||||
}
|
||||
output.push_str("}}");
|
||||
}
|
||||
output.push_str("]}");
|
||||
|
||||
let elapsed = start_time.elapsed();
|
||||
info!(
|
||||
h3 = %h3_str,
|
||||
|
|
@ -267,46 +261,15 @@ pub async fn get_hexagon_stats(
|
|||
"GET /api/hexagon-stats"
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
Ok(HexagonStatsResponse {
|
||||
count: total_count,
|
||||
numeric_features,
|
||||
enum_features: enum_features_out,
|
||||
})
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
Ok((
|
||||
[(axum::http::header::CONTENT_TYPE, "application/json")],
|
||||
result,
|
||||
))
|
||||
}
|
||||
|
||||
fn write_json_string(output: &mut String, value: &str) {
|
||||
output.push('"');
|
||||
for character in value.chars() {
|
||||
match character {
|
||||
'"' => output.push_str("\\\""),
|
||||
'\\' => output.push_str("\\\\"),
|
||||
'\n' => output.push_str("\\n"),
|
||||
'\r' => output.push_str("\\r"),
|
||||
'\t' => output.push_str("\\t"),
|
||||
other => output.push(other),
|
||||
}
|
||||
}
|
||||
output.push('"');
|
||||
}
|
||||
|
||||
fn format_num(value: f32) -> String {
|
||||
let fv = value as f64;
|
||||
if fv.fract() == 0.0 && fv.abs() < 1e15 {
|
||||
format!("{:.1}", fv)
|
||||
} else {
|
||||
format!("{}", fv)
|
||||
}
|
||||
}
|
||||
|
||||
fn format_f64(value: f64) -> String {
|
||||
if value.fract() == 0.0 && value.abs() < 1e15 {
|
||||
format!("{:.1}", value)
|
||||
} else {
|
||||
format!("{}", value)
|
||||
}
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue