good stuff
This commit is contained in:
parent
ea8389ef40
commit
f4de0eeb9f
39 changed files with 5165 additions and 348 deletions
|
|
@ -5,6 +5,7 @@ use axum::response::Json;
|
|||
use axum::Extension;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use metrics::counter;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
|
|
@ -527,6 +528,7 @@ pub async fn post_ai_filters(
|
|||
};
|
||||
|
||||
if tokens_used >= AI_FILTERS_WEEKLY_TOKEN_LIMIT {
|
||||
counter!("ai_requests_total", "status" => "rate_limited").increment(1);
|
||||
return Err((
|
||||
StatusCode::TOO_MANY_REQUESTS,
|
||||
"Weekly AI usage limit reached. Resets next week.".into(),
|
||||
|
|
@ -695,6 +697,9 @@ pub async fn post_ai_filters(
|
|||
let new_total = tokens_used + total_tokens_accumulated;
|
||||
update_ai_usage(&state, &user.id, new_total, current_week).await;
|
||||
|
||||
counter!("ai_tokens_total").increment(total_tokens_accumulated);
|
||||
counter!("ai_requests_total", "status" => "success").increment(1);
|
||||
|
||||
return Ok(Json(AiFiltersResponse {
|
||||
filters,
|
||||
travel_time_filters,
|
||||
|
|
|
|||
|
|
@ -6,14 +6,15 @@ use axum::extract::Query;
|
|||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Json};
|
||||
use axum::Extension;
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::licensing::check_license_bounds;
|
||||
use crate::parsing::{
|
||||
cell_for_row, h3_cell_bounds, needs_parent, parse_field_set, parse_filters, row_passes_filters,
|
||||
validate_h3_resolution,
|
||||
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters,
|
||||
row_passes_filters, validate_h3_resolution,
|
||||
};
|
||||
use crate::state::AppState;
|
||||
|
||||
|
|
@ -132,12 +133,14 @@ pub async fn get_hexagon_stats(
|
|||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
||||
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
||||
let mut matching_rows: Vec<usize> = Vec::new();
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if cell_for_row(row, precomputed, h3_res, need_parent) == cell_u64
|
||||
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
|
||||
== cell_u64
|
||||
&& row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@ use axum::extract::Query;
|
|||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Json};
|
||||
use axum::Extension;
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use metrics::histogram;
|
||||
use tracing::info;
|
||||
|
||||
use crate::aggregation::Aggregator;
|
||||
|
|
@ -15,12 +17,15 @@ use crate::consts::{DEMO_BOUNDS, MAX_CELLS_PER_REQUEST};
|
|||
use crate::data::travel_time::TravelData;
|
||||
use crate::licensing::check_license_bounds;
|
||||
use crate::parsing::{
|
||||
bounds_intersect, cell_for_row, h3_cell_bounds, needs_parent, parse_field_indices,
|
||||
bounds_intersect, cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_indices,
|
||||
parse_filters, require_bounds, row_passes_filters, validate_h3_resolution,
|
||||
};
|
||||
use crate::routes::travel_time::{parse_optional_travel, TravelTimeAgg};
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Row count threshold above which we use rayon parallel aggregation.
|
||||
const PARALLEL_THRESHOLD: usize = 50_000;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct HexagonsResponse {
|
||||
features: Vec<Map<String, Value>>,
|
||||
|
|
@ -202,11 +207,67 @@ pub async fn get_hexagons(
|
|||
.map(|_| FxHashMap::default())
|
||||
.collect();
|
||||
|
||||
// Main aggregation loop
|
||||
let aggregate_row =
|
||||
|row: usize,
|
||||
groups: &mut FxHashMap<u64, Aggregator>,
|
||||
travel_aggs: &mut [FxHashMap<u64, TravelTimeAgg>]| {
|
||||
// Collect row indices for threshold-based sequential/parallel aggregation
|
||||
let row_indices = state.grid.query(south, west, north, east);
|
||||
|
||||
if row_indices.len() >= PARALLEL_THRESHOLD && !has_travel {
|
||||
// Parallel path: split rows across rayon threads, each with local accumulators
|
||||
let chunk_size = (row_indices.len() / rayon::current_num_threads()).max(1000);
|
||||
|
||||
let thread_results: Vec<FxHashMap<u64, Aggregator>> = row_indices
|
||||
.par_chunks(chunk_size)
|
||||
.map(|chunk| {
|
||||
let mut local_groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
|
||||
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
||||
for &row_idx in chunk {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
let cell_id =
|
||||
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
|
||||
let agg = local_groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
if let Some(sel_indices) = field_indices.as_deref() {
|
||||
agg.add_row_selective(
|
||||
feature_data,
|
||||
row,
|
||||
num_features,
|
||||
sel_indices,
|
||||
&quant,
|
||||
);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features, &quant);
|
||||
}
|
||||
}
|
||||
local_groups
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Merge thread-local results into the main groups map
|
||||
for local_groups in thread_results {
|
||||
for (cell_id, local_agg) in local_groups {
|
||||
let agg = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
agg.merge(&local_agg);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Sequential path (also handles travel time which needs postcode lookups)
|
||||
let mut travel_minutes: Vec<Option<i16>> = Vec::with_capacity(travel_entries.len());
|
||||
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
||||
|
||||
'row: for &row_idx in &row_indices {
|
||||
let row = row_idx as usize;
|
||||
|
||||
// Regular filters
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
|
|
@ -215,14 +276,13 @@ pub async fn get_hexagons(
|
|||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Travel time filter: check each entry with a range
|
||||
let mut travel_minutes: Vec<Option<i16>> = Vec::new();
|
||||
if has_travel {
|
||||
travel_minutes.clear();
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
travel_minutes.reserve(travel_entries.len());
|
||||
for (ti, entry) in travel_entries.iter().enumerate() {
|
||||
let row_data = travel_data[ti].get(postcode);
|
||||
let minutes = row_data.map(|r| {
|
||||
|
|
@ -236,13 +296,14 @@ pub async fn get_hexagons(
|
|||
if let (Some(fmin), Some(fmax)) = (entry.filter_min, entry.filter_max) {
|
||||
match minutes {
|
||||
Some(mins) if (mins as f32) >= fmin && (mins as f32) <= fmax => {}
|
||||
_ => return, // Filtered out
|
||||
_ => continue 'row, // Filtered out (jump to next row_idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
|
||||
let cell_id =
|
||||
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
|
||||
|
||||
// Aggregate regular features
|
||||
let aggregation = groups
|
||||
|
|
@ -269,13 +330,8 @@ pub async fn get_hexagons(
|
|||
agg.add(*mins as f32);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
aggregate_row(row_idx as usize, &mut groups, &mut travel_aggs);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let t_agg = t0.elapsed();
|
||||
|
||||
|
|
@ -296,9 +352,12 @@ pub async fn get_hexagons(
|
|||
features.truncate(MAX_CELLS_PER_REQUEST);
|
||||
}
|
||||
|
||||
let parallel = row_indices.len() >= PARALLEL_THRESHOLD && !has_travel;
|
||||
let t_total = t0.elapsed();
|
||||
info!(
|
||||
resolution,
|
||||
rows = row_indices.len(),
|
||||
parallel,
|
||||
cells_before_filter = groups.len(),
|
||||
cells_after_filter = features.len(),
|
||||
truncated,
|
||||
|
|
@ -311,6 +370,8 @@ pub async fn get_hexagons(
|
|||
"GET /api/hexagons"
|
||||
);
|
||||
|
||||
histogram!("hexagons_response_count").record(features.len() as f64);
|
||||
|
||||
Ok(HexagonsResponse { features })
|
||||
})
|
||||
.await
|
||||
|
|
|
|||
|
|
@ -41,13 +41,22 @@ pub async fn get_pois(
|
|||
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
|
||||
let (south, west, north, east) = require_bounds(params.bounds)?;
|
||||
|
||||
let category_filter: Option<rustc_hash::FxHashSet<String>> = params
|
||||
let category_filter: Option<rustc_hash::FxHashSet<u16>> = params
|
||||
.categories
|
||||
.as_deref()
|
||||
.filter(|text| !text.is_empty())
|
||||
.map(|text| {
|
||||
text.split(',')
|
||||
.map(|part| part.trim().to_string())
|
||||
.filter_map(|part| {
|
||||
let name = part.trim();
|
||||
state
|
||||
.poi_data
|
||||
.category
|
||||
.values
|
||||
.iter()
|
||||
.position(|v| v == name)
|
||||
.map(|pos| pos as u16)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
let categories_raw = params.categories;
|
||||
|
|
@ -63,7 +72,7 @@ pub async fn get_pois(
|
|||
.filter_map(|&row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if let Some(ref categories) = category_filter {
|
||||
if !categories.contains(state.poi_data.category.get(row)) {
|
||||
if !categories.contains(&state.poi_data.category.indices[row]) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use axum::Extension;
|
|||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use metrics::histogram;
|
||||
use tracing::info;
|
||||
|
||||
use crate::aggregation::Aggregator;
|
||||
|
|
@ -38,34 +39,6 @@ pub struct PostcodeParams {
|
|||
travel: Option<String>,
|
||||
}
|
||||
|
||||
/// Build a GeoJSON geometry object from postcode polygon rings.
|
||||
/// Returns Polygon for 1 ring, MultiPolygon for 2+ rings.
|
||||
fn build_postcode_geometry(rings: &[Vec<[f32; 2]>]) -> Value {
|
||||
if rings.len() == 1 {
|
||||
let coords: Vec<Value> = rings[0]
|
||||
.iter()
|
||||
.map(|[lon, lat]| {
|
||||
Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)])
|
||||
})
|
||||
.collect();
|
||||
serde_json::json!({ "type": "Polygon", "coordinates": [coords] })
|
||||
} else {
|
||||
let polys: Vec<Value> = rings
|
||||
.iter()
|
||||
.map(|ring| {
|
||||
let coords: Vec<Value> = ring
|
||||
.iter()
|
||||
.map(|[lon, lat]| {
|
||||
Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)])
|
||||
})
|
||||
.collect();
|
||||
Value::Array(vec![Value::Array(coords)])
|
||||
})
|
||||
.collect();
|
||||
serde_json::json!({ "type": "MultiPolygon", "coordinates": polys })
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_postcodes(
|
||||
state: Arc<AppState>,
|
||||
Extension(user): Extension<OptionalUser>,
|
||||
|
|
@ -128,9 +101,8 @@ pub async fn get_postcodes(
|
|||
let has_selective = field_indices.is_some();
|
||||
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
|
||||
|
||||
// Build postcode -> rows mapping by iterating properties in bounds
|
||||
// and grouping by their postcode
|
||||
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
|
||||
// Single-pass: aggregate directly into postcode_aggs while iterating properties in bounds
|
||||
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
|
||||
|
||||
state
|
||||
.grid
|
||||
|
|
@ -146,16 +118,22 @@ pub async fn get_postcodes(
|
|||
return;
|
||||
}
|
||||
|
||||
// Get postcode for this property
|
||||
let postcode = state.data.postcode(row);
|
||||
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
|
||||
postcode_rows.entry(pc_idx).or_default().push(row);
|
||||
let agg = postcode_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features, &quant);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Filter postcodes by travel time range (if specified)
|
||||
if has_travel {
|
||||
postcode_rows.retain(|&pc_idx, _rows| {
|
||||
postcode_aggs.retain(|&pc_idx, _agg| {
|
||||
let postcode = &postcode_data.postcodes[pc_idx];
|
||||
for (ti, entry) in travel_entries.iter().enumerate() {
|
||||
if let (Some(fmin), Some(fmax)) = (entry.filter_min, entry.filter_max) {
|
||||
|
|
@ -176,26 +154,10 @@ pub async fn get_postcodes(
|
|||
});
|
||||
}
|
||||
|
||||
// Aggregate for each postcode that has properties in bounds
|
||||
// (polygon intersection check happens later when building response)
|
||||
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
|
||||
// Travel time aggregation per postcode
|
||||
let mut travel_aggs: FxHashMap<usize, Vec<TravelTimeAgg>> = FxHashMap::default();
|
||||
|
||||
for (&pc_idx, rows) in &postcode_rows {
|
||||
let agg = postcode_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| Aggregator::new(num_features));
|
||||
for &row in rows {
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features, &quant);
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate travel times for this postcode
|
||||
if has_travel {
|
||||
if has_travel {
|
||||
for &pc_idx in postcode_aggs.keys() {
|
||||
let postcode = &postcode_data.postcodes[pc_idx];
|
||||
let tt_aggs = travel_aggs.entry(pc_idx).or_insert_with(|| {
|
||||
(0..travel_entries.len())
|
||||
|
|
@ -225,37 +187,24 @@ pub async fn get_postcodes(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Compute postcode polygon bounding box across ALL parts and check intersection
|
||||
let rings = &postcode_data.polygons[pc_idx];
|
||||
let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
for ring in rings {
|
||||
for &[lon, lat] in ring {
|
||||
let lon_f = lon as f64;
|
||||
let lat_f = lat as f64;
|
||||
if lat_f < pc_south {
|
||||
pc_south = lat_f;
|
||||
}
|
||||
if lat_f > pc_north {
|
||||
pc_north = lat_f;
|
||||
}
|
||||
if lon_f < pc_west {
|
||||
pc_west = lon_f;
|
||||
}
|
||||
if lon_f > pc_east {
|
||||
pc_east = lon_f;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Use precomputed AABB for bounds intersection check
|
||||
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
|
||||
|
||||
if !bounds_intersect(
|
||||
pc_south, pc_west, pc_north, pc_east, south, west, north, east,
|
||||
pc_south as f64,
|
||||
pc_west as f64,
|
||||
pc_north as f64,
|
||||
pc_east as f64,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
) {
|
||||
filtered_out += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let geometry = build_postcode_geometry(rings);
|
||||
let geometry = postcode_data.geometries[pc_idx].clone();
|
||||
|
||||
// Build properties
|
||||
let centroid = postcode_data.centroids[pc_idx];
|
||||
|
|
@ -327,6 +276,8 @@ pub async fn get_postcodes(
|
|||
}
|
||||
}
|
||||
|
||||
histogram!("postcodes_response_count").record(features.len() as f64);
|
||||
|
||||
let truncated = features.len() > MAX_CELLS_PER_REQUEST;
|
||||
let t_total = t0.elapsed();
|
||||
info!(
|
||||
|
|
@ -365,8 +316,7 @@ pub async fn get_postcode_lookup(
|
|||
|
||||
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
|
||||
let (lat, lon) = postcode_data.centroids[idx];
|
||||
let rings = &postcode_data.polygons[idx];
|
||||
let geometry = build_postcode_geometry(rings);
|
||||
let geometry = postcode_data.geometries[idx].clone();
|
||||
|
||||
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
|
||||
Ok(Json(serde_json::json!({
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT};
|
|||
use crate::data::RenovationEvent;
|
||||
use crate::licensing::check_license_bounds;
|
||||
use crate::parsing::{
|
||||
cell_for_row, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
|
||||
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
|
||||
validate_h3_resolution,
|
||||
};
|
||||
use crate::state::AppState;
|
||||
|
|
@ -220,12 +220,14 @@ pub async fn get_hexagon_properties(
|
|||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
||||
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
||||
let mut matching_rows: Vec<usize> = Vec::new();
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if cell_for_row(row, precomputed, h3_res, need_parent) == cell_u64
|
||||
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
|
||||
== cell_u64
|
||||
&& row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use std::sync::Arc;
|
|||
use axum::http::header::HeaderValue;
|
||||
use axum::http::{header, HeaderMap, StatusCode, Uri};
|
||||
use axum::response::IntoResponse;
|
||||
use metrics::histogram;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::state::AppState;
|
||||
|
|
@ -44,8 +45,14 @@ pub async fn get_screenshot(
|
|||
) -> impl IntoResponse {
|
||||
let qs = uri.query().unwrap_or_default();
|
||||
let auth = headers.get(header::AUTHORIZATION);
|
||||
let is_og = qs.contains("og=1");
|
||||
|
||||
match fetch_screenshot_bytes(&state, qs, auth).await {
|
||||
let t0 = std::time::Instant::now();
|
||||
let result = fetch_screenshot_bytes(&state, qs, auth).await;
|
||||
let kind = if is_og { "og" } else { "export" };
|
||||
histogram!("screenshot_duration_seconds", "kind" => kind).record(t0.elapsed().as_secs_f64());
|
||||
|
||||
match result {
|
||||
Ok(bytes) => (
|
||||
StatusCode::OK,
|
||||
[
|
||||
|
|
|
|||
|
|
@ -50,7 +50,32 @@ pub fn extract_price_history(
|
|||
}
|
||||
}
|
||||
|
||||
/// Per-feature accumulator kind, determined once before the row loop.
|
||||
enum FeatureAccum {
|
||||
/// Numeric: track count, min, max, sum, histogram bins.
|
||||
Numeric {
|
||||
count: usize,
|
||||
min_value: f32,
|
||||
max_value: f32,
|
||||
sum: f64,
|
||||
bins: Vec<u64>,
|
||||
p1: f32,
|
||||
p99: f32,
|
||||
middle_width: f32,
|
||||
num_bins: usize,
|
||||
global_min: f32,
|
||||
global_max: f32,
|
||||
},
|
||||
/// Enum: count occurrences per variant index.
|
||||
Enum {
|
||||
value_counts: Vec<u64>,
|
||||
},
|
||||
/// Feature skipped (not in field_set).
|
||||
Skip,
|
||||
}
|
||||
|
||||
/// Compute per-feature stats (numeric histograms + enum counts) for the given rows.
|
||||
/// Single-pass: iterates rows in the outer loop for cache-friendly row-major access.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn compute_feature_stats(
|
||||
matching_rows: &[usize],
|
||||
|
|
@ -61,107 +86,161 @@ pub fn compute_feature_stats(
|
|||
fields_specified: bool,
|
||||
field_set: &HashSet<String>,
|
||||
) -> (Vec<NumericFeatureStats>, Vec<EnumFeatureStats>) {
|
||||
let num_features = feature_names.len();
|
||||
|
||||
// Pre-allocate accumulators for all features
|
||||
let mut accums: Vec<FeatureAccum> = (0..num_features)
|
||||
.map(|fi| {
|
||||
let feature_name = &feature_names[fi];
|
||||
if fields_specified && !field_set.contains(feature_name.as_str()) {
|
||||
return FeatureAccum::Skip;
|
||||
}
|
||||
|
||||
if let Some(ev) = enum_values.get(&fi) {
|
||||
FeatureAccum::Enum {
|
||||
value_counts: vec![0u64; ev.len()],
|
||||
}
|
||||
} else {
|
||||
let global_hist = &feature_stats_data[fi].histogram;
|
||||
let p1 = global_hist.p1;
|
||||
let p99 = global_hist.p99;
|
||||
let num_bins = global_hist.counts.len();
|
||||
let middle_bins = num_bins.saturating_sub(2);
|
||||
let middle_width = if middle_bins > 0 && p99 > p1 {
|
||||
(p99 - p1) / middle_bins as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
FeatureAccum::Numeric {
|
||||
count: 0,
|
||||
min_value: f32::INFINITY,
|
||||
max_value: f32::NEG_INFINITY,
|
||||
sum: 0.0,
|
||||
bins: vec![0u64; num_bins],
|
||||
p1,
|
||||
p99,
|
||||
middle_width,
|
||||
num_bins,
|
||||
global_min: global_hist.min,
|
||||
global_max: global_hist.max,
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Single pass: outer loop = rows, inner loop = features (cache-friendly row-major access)
|
||||
for &row in matching_rows {
|
||||
for (fi, accum) in accums.iter_mut().enumerate() {
|
||||
match accum {
|
||||
FeatureAccum::Skip => {}
|
||||
FeatureAccum::Enum { value_counts } => {
|
||||
let value = data.get_feature(row, fi);
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if idx < value_counts.len() {
|
||||
value_counts[idx] += 1;
|
||||
} else {
|
||||
warn!(
|
||||
feature = feature_names[fi].as_str(),
|
||||
idx,
|
||||
max = value_counts.len(),
|
||||
"Enum index out of bounds — possible data/schema mismatch"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
FeatureAccum::Numeric {
|
||||
count,
|
||||
min_value,
|
||||
max_value,
|
||||
sum,
|
||||
bins,
|
||||
p1,
|
||||
p99,
|
||||
middle_width,
|
||||
num_bins,
|
||||
..
|
||||
} => {
|
||||
let value = data.get_feature(row, fi);
|
||||
if value.is_finite() {
|
||||
*count += 1;
|
||||
if value < *min_value {
|
||||
*min_value = value;
|
||||
}
|
||||
if value > *max_value {
|
||||
*max_value = value;
|
||||
}
|
||||
*sum += value as f64;
|
||||
|
||||
let bin = if value < *p1 {
|
||||
0
|
||||
} else if value >= *p99 {
|
||||
*num_bins - 1
|
||||
} else if *middle_width > 0.0 {
|
||||
let middle_bin = ((value - *p1) / *middle_width) as usize;
|
||||
(1 + middle_bin).min(*num_bins - 2)
|
||||
} else {
|
||||
*num_bins / 2
|
||||
};
|
||||
bins[bin] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build response structs from accumulators
|
||||
let mut numeric_features = Vec::new();
|
||||
let mut enum_features_out = Vec::new();
|
||||
|
||||
for (feature_index, feature_name) in feature_names.iter().enumerate() {
|
||||
if fields_specified && !field_set.contains(feature_name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
for (fi, accum) in accums.into_iter().enumerate() {
|
||||
match accum {
|
||||
FeatureAccum::Skip => {}
|
||||
FeatureAccum::Enum { value_counts } => {
|
||||
let ev = &enum_values[&fi];
|
||||
let counts: HashMap<String, u64> = value_counts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, &count)| count > 0)
|
||||
.map(|(idx, &count)| (ev[idx].clone(), count))
|
||||
.collect();
|
||||
|
||||
if let Some(ev) = enum_values.get(&feature_index) {
|
||||
let mut value_counts = vec![0u64; ev.len()];
|
||||
for &row in matching_rows {
|
||||
let value = data.get_feature(row, feature_index);
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if idx < value_counts.len() {
|
||||
value_counts[idx] += 1;
|
||||
} else {
|
||||
warn!(
|
||||
feature = feature_name.as_str(),
|
||||
idx,
|
||||
max = value_counts.len(),
|
||||
"Enum index out of bounds — possible data/schema mismatch"
|
||||
);
|
||||
}
|
||||
if !counts.is_empty() {
|
||||
enum_features_out.push(EnumFeatureStats {
|
||||
name: feature_names[fi].clone(),
|
||||
counts,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let counts: HashMap<String, u64> = value_counts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, &count)| count > 0)
|
||||
.map(|(idx, &count)| (ev[idx].clone(), count))
|
||||
.collect();
|
||||
|
||||
if !counts.is_empty() {
|
||||
enum_features_out.push(EnumFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
counts,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
let global_hist = &feature_stats_data[feature_index].histogram;
|
||||
let p1 = global_hist.p1;
|
||||
let p99 = global_hist.p99;
|
||||
let num_bins = global_hist.counts.len();
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64;
|
||||
let mut bins = vec![0u64; num_bins];
|
||||
|
||||
let middle_bins = num_bins.saturating_sub(2);
|
||||
let middle_width = if middle_bins > 0 && p99 > p1 {
|
||||
(p99 - p1) / middle_bins as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
for &row in matching_rows {
|
||||
let value = data.get_feature(row, feature_index);
|
||||
if value.is_finite() {
|
||||
count += 1;
|
||||
if value < min_value {
|
||||
min_value = value;
|
||||
}
|
||||
if value > max_value {
|
||||
max_value = value;
|
||||
}
|
||||
sum += value as f64;
|
||||
|
||||
let bin = if value < p1 {
|
||||
0
|
||||
} else if value >= p99 {
|
||||
num_bins - 1
|
||||
} else if middle_width > 0.0 {
|
||||
let middle_bin = ((value - p1) / middle_width) as usize;
|
||||
(1 + middle_bin).min(num_bins - 2)
|
||||
} else {
|
||||
num_bins / 2
|
||||
};
|
||||
bins[bin] += 1;
|
||||
FeatureAccum::Numeric {
|
||||
count,
|
||||
min_value,
|
||||
max_value,
|
||||
sum,
|
||||
bins,
|
||||
p1,
|
||||
p99,
|
||||
global_min,
|
||||
global_max,
|
||||
..
|
||||
} => {
|
||||
if count > 0 {
|
||||
numeric_features.push(NumericFeatureStats {
|
||||
name: feature_names[fi].clone(),
|
||||
count,
|
||||
min: min_value as f64,
|
||||
max: max_value as f64,
|
||||
mean: sum / count as f64,
|
||||
histogram: HistogramStats {
|
||||
min: global_min as f64,
|
||||
max: global_max as f64,
|
||||
p1: p1 as f64,
|
||||
p99: p99 as f64,
|
||||
counts: bins,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
numeric_features.push(NumericFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
count,
|
||||
min: min_value as f64,
|
||||
max: max_value as f64,
|
||||
mean: sum / count as f64,
|
||||
histogram: HistogramStats {
|
||||
min: global_hist.min as f64,
|
||||
max: global_hist.max as f64,
|
||||
p1: p1 as f64,
|
||||
p99: p99 as f64,
|
||||
counts: bins,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
77
server-rs/src/routes/telemetry.rs
Normal file
77
server-rs/src/routes/telemetry.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
use axum::http::{HeaderMap, StatusCode};
|
||||
use axum::response::Json;
|
||||
use axum::Extension;
|
||||
use metrics::{counter, gauge};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct TelemetryPayload {
|
||||
session_seconds: u64,
|
||||
filter_count: u64,
|
||||
/// Sent once on first beacon: the entry page path
|
||||
#[serde(default)]
|
||||
entry_path: Option<String>,
|
||||
/// Sent once on first beacon: the document.referrer domain (or "direct")
|
||||
#[serde(default)]
|
||||
referrer: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn post_telemetry(
|
||||
Extension(user): Extension<OptionalUser>,
|
||||
headers: HeaderMap,
|
||||
Json(payload): Json<TelemetryPayload>,
|
||||
) -> StatusCode {
|
||||
let user_label = match &user.0 {
|
||||
Some(u) => u.email.clone(),
|
||||
None => "anonymous".to_string(),
|
||||
};
|
||||
|
||||
let ua = headers
|
||||
.get("user-agent")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("unknown");
|
||||
let browser = parse_browser(ua);
|
||||
|
||||
gauge!("user_session_seconds", "user" => user_label.clone(), "browser" => browser.clone())
|
||||
.set(payload.session_seconds as f64);
|
||||
gauge!("user_active_filters", "user" => user_label, "browser" => browser)
|
||||
.set(payload.filter_count as f64);
|
||||
|
||||
// Entrypoint tracking (sent once per session)
|
||||
if let Some(path) = &payload.entry_path {
|
||||
let referrer = payload.referrer.as_deref().unwrap_or("direct");
|
||||
counter!("entrypoint_total", "path" => normalize_entry_path(path), "referrer" => referrer.to_string())
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
StatusCode::NO_CONTENT
|
||||
}
|
||||
|
||||
/// Normalize entry paths to prevent cardinality explosion.
|
||||
/// Keep known routes, parameterize dynamic segments.
|
||||
fn normalize_entry_path(path: &str) -> String {
|
||||
match path {
|
||||
"/" | "/dashboard" | "/pricing" | "/learn" | "/saved" | "/invites" | "/account" => {
|
||||
path.to_string()
|
||||
}
|
||||
p if p.starts_with("/invite/") => "/invite/:code".to_string(),
|
||||
p if p.starts_with("/s/") => "/s/:code".to_string(),
|
||||
_ => "/other".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_browser(ua: &str) -> String {
|
||||
if ua.contains("Firefox") {
|
||||
"Firefox".into()
|
||||
} else if ua.contains("Edg/") {
|
||||
"Edge".into()
|
||||
} else if ua.contains("Chrome") {
|
||||
"Chrome".into()
|
||||
} else if ua.contains("Safari") {
|
||||
"Safari".into()
|
||||
} else {
|
||||
"Other".into()
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue