Optimisations
This commit is contained in:
parent
66c2a25457
commit
9179acd4cd
21 changed files with 653 additions and 139 deletions
|
|
@ -14,9 +14,9 @@ pub enum FeatureInfo {
|
|||
#[serde(rename = "numeric")]
|
||||
Numeric {
|
||||
name: String,
|
||||
min: f64,
|
||||
max: f64,
|
||||
step: f64,
|
||||
min: f32,
|
||||
max: f32,
|
||||
step: f32,
|
||||
histogram: Histogram,
|
||||
description: &'static str,
|
||||
detail: &'static str,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use axum::response::IntoResponse;
|
|||
use serde::Deserialize;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{ENUM_NULL, HISTOGRAM_BINS};
|
||||
use crate::consts::{ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
|
|
@ -31,17 +31,21 @@ pub async fn get_hexagon_stats(
|
|||
})?;
|
||||
let cell_u64: u64 = cell.into();
|
||||
|
||||
let resolution = params.resolution as usize;
|
||||
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
||||
let resolution = params.resolution;
|
||||
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
||||
warn!(
|
||||
resolution,
|
||||
"Invalid or non-precomputed resolution for hexagon-stats"
|
||||
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
);
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Invalid or non-precomputed resolution".to_string(),
|
||||
format!(
|
||||
"resolution must be between {} and {}",
|
||||
H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
),
|
||||
));
|
||||
}
|
||||
let resolution_idx = resolution as usize;
|
||||
|
||||
let h3_str = params.h3.clone();
|
||||
let filters_str = params.filters.clone();
|
||||
|
|
@ -54,7 +58,13 @@ pub async fn get_hexagon_stats(
|
|||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let start_time = std::time::Instant::now();
|
||||
let h3_data = &state.h3_cells[resolution];
|
||||
let precomputed: Option<&[u64]> = state
|
||||
.h3_cells
|
||||
.get(resolution_idx)
|
||||
.filter(|cells| !cells.is_empty())
|
||||
.map(|cells| cells.as_slice());
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_features = &state.data.enum_features;
|
||||
|
|
@ -67,7 +77,14 @@ pub async fn get_hexagon_stats(
|
|||
.grid
|
||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if h3_data[row] == cell_u64
|
||||
let row_cell = if let Some(h3_data) = precomputed {
|
||||
h3_data[row]
|
||||
} else {
|
||||
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
||||
.unwrap_or(0)
|
||||
};
|
||||
if row_cell == cell_u64
|
||||
&& row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
@ -98,9 +115,9 @@ pub async fn get_hexagon_stats(
|
|||
let bin_width = global_stats.histogram.bin_width;
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f64::INFINITY;
|
||||
let mut max_value = f64::NEG_INFINITY;
|
||||
let mut sum = 0.0f64;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64; // keep f64 for mean precision
|
||||
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||
|
||||
for &row in &matching_rows {
|
||||
|
|
@ -113,12 +130,12 @@ pub async fn get_hexagon_stats(
|
|||
if value > max_value {
|
||||
max_value = value;
|
||||
}
|
||||
sum += value;
|
||||
sum += value as f64;
|
||||
|
||||
// Bin into histogram using global edges
|
||||
// Bin into histogram using global edges (cast to f64 for bin index math)
|
||||
if bin_width > 0.0 {
|
||||
let bin_index =
|
||||
((value - histogram_min) / bin_width).floor() as isize;
|
||||
((value as f64 - histogram_min as f64) / bin_width as f64).floor() as isize;
|
||||
let clamped_index = bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
|
||||
bins[clamped_index] += 1;
|
||||
}
|
||||
|
|
@ -138,15 +155,15 @@ pub async fn get_hexagon_stats(
|
|||
output.push_str("{\"name\":");
|
||||
write_json_string(&mut output, feature_name);
|
||||
write!(output, ",\"count\":{}", count).unwrap();
|
||||
write!(output, ",\"min\":{}", format_f64(min_value)).unwrap();
|
||||
write!(output, ",\"max\":{}", format_f64(max_value)).unwrap();
|
||||
write!(output, ",\"min\":{}", format_num(min_value)).unwrap();
|
||||
write!(output, ",\"max\":{}", format_num(max_value)).unwrap();
|
||||
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
|
||||
output.push_str(",\"histogram\":{\"min\":");
|
||||
write!(output, "{}", format_f64(histogram_min)).unwrap();
|
||||
write!(output, "{}", format_num(histogram_min)).unwrap();
|
||||
output.push_str(",\"max\":");
|
||||
write!(output, "{}", format_f64(histogram_max)).unwrap();
|
||||
write!(output, "{}", format_num(histogram_max)).unwrap();
|
||||
output.push_str(",\"bin_width\":");
|
||||
write!(output, "{}", format_f64(bin_width)).unwrap();
|
||||
write!(output, "{}", format_num(bin_width)).unwrap();
|
||||
output.push_str(",\"counts\":[");
|
||||
for (bin_index, &bin_count) in bins.iter().enumerate() {
|
||||
if bin_index > 0 {
|
||||
|
|
@ -216,10 +233,11 @@ pub async fn get_hexagon_stats(
|
|||
"GET /api/hexagon-stats"
|
||||
);
|
||||
|
||||
output
|
||||
Ok(output)
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
Ok((
|
||||
[(axum::http::header::CONTENT_TYPE, "application/json")],
|
||||
|
|
@ -242,6 +260,15 @@ fn write_json_string(output: &mut String, value: &str) {
|
|||
output.push('"');
|
||||
}
|
||||
|
||||
fn format_num(value: f32) -> String {
|
||||
let fv = value as f64;
|
||||
if fv.fract() == 0.0 && fv.abs() < 1e15 {
|
||||
format!("{:.1}", fv)
|
||||
} else {
|
||||
format!("{}", fv)
|
||||
}
|
||||
}
|
||||
|
||||
fn format_f64(value: f64) -> String {
|
||||
if value.fract() == 0.0 && value.abs() < 1e15 {
|
||||
format!("{:.1}", value)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use serde::Deserialize;
|
|||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN,
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
||||
POSTCODE_MIN_RESOLUTION,
|
||||
};
|
||||
use crate::filter::parse_filters;
|
||||
|
|
@ -44,8 +44,8 @@ pub struct HexagonParams {
|
|||
/// Per-cell accumulator for aggregating features
|
||||
struct CellAgg {
|
||||
count: u32,
|
||||
mins: Vec<f64>,
|
||||
maxs: Vec<f64>,
|
||||
mins: Vec<f32>,
|
||||
maxs: Vec<f32>,
|
||||
/// Min/max ordinal indices for enum features (255 = no data yet)
|
||||
enum_mins: Vec<u8>,
|
||||
enum_maxs: Vec<u8>,
|
||||
|
|
@ -60,8 +60,8 @@ impl CellAgg {
|
|||
fn new(num_features: usize, num_enums: usize) -> Self {
|
||||
CellAgg {
|
||||
count: 0,
|
||||
mins: vec![f64::INFINITY; num_features],
|
||||
maxs: vec![f64::NEG_INFINITY; num_features],
|
||||
mins: vec![f32::INFINITY; num_features],
|
||||
maxs: vec![f32::NEG_INFINITY; num_features],
|
||||
enum_mins: vec![ENUM_NULL; num_enums],
|
||||
enum_maxs: vec![0; num_enums],
|
||||
postcode: None,
|
||||
|
|
@ -75,7 +75,7 @@ impl CellAgg {
|
|||
/// feature_data[row * num_features + feat_idx] — all features for one row
|
||||
/// are contiguous, so this reads a single cache line per ~8 features.
|
||||
#[inline]
|
||||
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
|
||||
fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
|
||||
self.count += 1;
|
||||
let base = row * num_features;
|
||||
let row_slice = &feature_data[base..base + num_features];
|
||||
|
|
@ -110,9 +110,9 @@ impl CellAgg {
|
|||
/// Track postcode and centroid for high-resolution cells.
|
||||
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
|
||||
#[inline]
|
||||
fn add_postcode(&mut self, postcode: &str, lat: f64, lon: f64) {
|
||||
self.lat_sum += lat;
|
||||
self.lon_sum += lon;
|
||||
fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) {
|
||||
self.lat_sum += lat as f64;
|
||||
self.lon_sum += lon as f64;
|
||||
if postcode.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
|
@ -212,16 +212,16 @@ pub async fn get_hexagons(
|
|||
Query(params): Query<HexagonParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
let resolution = params.resolution;
|
||||
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
|
||||
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
||||
warn!(
|
||||
resolution,
|
||||
"Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
|
||||
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
);
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!(
|
||||
"resolution must be between {} and {}",
|
||||
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
|
||||
H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
),
|
||||
));
|
||||
}
|
||||
|
|
@ -304,7 +304,7 @@ pub async fn get_hexagons(
|
|||
aggregation.add_enums(enum_features, row);
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
&state.data.postcode[row],
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
|
|
@ -320,7 +320,7 @@ pub async fn get_hexagons(
|
|||
if !row_passes(row) {
|
||||
return;
|
||||
}
|
||||
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
|
||||
let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
||||
.unwrap_or(0);
|
||||
let aggregation = groups
|
||||
|
|
@ -330,7 +330,7 @@ pub async fn get_hexagons(
|
|||
aggregation.add_enums(enum_features, row);
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
&state.data.postcode[row],
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ pub async fn get_pois(
|
|||
.filter_map(|&row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if let Some(ref categories) = category_filter {
|
||||
if !categories.contains(&state.poi_data.category[row]) {
|
||||
if !categories.contains(state.poi_data.category.get(row)) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
|
@ -83,11 +83,11 @@ pub async fn get_pois(
|
|||
.map(|&row| POI {
|
||||
id: state.poi_data.id[row].clone(),
|
||||
name: state.poi_data.name[row].clone(),
|
||||
category: state.poi_data.category[row].clone(),
|
||||
group: state.poi_data.group[row].clone(),
|
||||
category: state.poi_data.category.get(row).to_string(),
|
||||
group: state.poi_data.group.get(row).to_string(),
|
||||
lat: state.poi_data.lat[row],
|
||||
lng: state.poi_data.lng[row],
|
||||
emoji: state.poi_data.emoji[row].clone(),
|
||||
emoji: state.poi_data.emoji.get(row).to_string(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use rustc_hash::FxHashMap;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, MAX_PROPERTIES_LIMIT};
|
||||
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT};
|
||||
use crate::data::EnumFeatureData;
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
|
@ -36,13 +36,13 @@ pub struct Property {
|
|||
pub potential_energy_rating: Option<String>,
|
||||
|
||||
// Numeric fields
|
||||
pub lat: f64,
|
||||
pub lon: f64,
|
||||
pub lat: f32,
|
||||
pub lon: f32,
|
||||
|
||||
pub is_construction_date_approximate: Option<bool>,
|
||||
|
||||
#[serde(flatten)]
|
||||
pub features: FxHashMap<String, f64>,
|
||||
pub features: FxHashMap<String, f32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -93,17 +93,21 @@ pub async fn get_hexagon_properties(
|
|||
})?;
|
||||
let cell_u64: u64 = cell.into();
|
||||
|
||||
let resolution = params.resolution as usize;
|
||||
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
||||
let resolution = params.resolution;
|
||||
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
||||
warn!(
|
||||
resolution,
|
||||
"Invalid or non-precomputed resolution for hexagon-properties"
|
||||
"Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
);
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Invalid or non-precomputed resolution".to_string(),
|
||||
format!(
|
||||
"resolution must be between {} and {}",
|
||||
H3_REQUEST_MIN, H3_REQUEST_MAX
|
||||
),
|
||||
));
|
||||
}
|
||||
let resolution_idx = resolution as usize;
|
||||
|
||||
let h3_str = params.h3.clone();
|
||||
let filters_str = params.filters.clone();
|
||||
|
|
@ -116,7 +120,13 @@ pub async fn get_hexagon_properties(
|
|||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let t0 = std::time::Instant::now();
|
||||
let h3_data = &state.h3_cells[resolution];
|
||||
let precomputed: Option<&[u64]> = state
|
||||
.h3_cells
|
||||
.get(resolution_idx)
|
||||
.filter(|cells| !cells.is_empty())
|
||||
.map(|cells| cells.as_slice());
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_features = &state.data.enum_features;
|
||||
|
|
@ -128,7 +138,14 @@ pub async fn get_hexagon_properties(
|
|||
.grid
|
||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if h3_data[row] == cell_u64
|
||||
let row_cell = if let Some(h3_data) = precomputed {
|
||||
h3_data[row]
|
||||
} else {
|
||||
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
||||
.unwrap_or(0)
|
||||
};
|
||||
if row_cell == cell_u64
|
||||
&& row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
@ -162,8 +179,8 @@ pub async fn get_hexagon_properties(
|
|||
}
|
||||
|
||||
Property {
|
||||
address: non_empty_string(&state.data.address[row]),
|
||||
postcode: non_empty_string(&state.data.postcode[row]),
|
||||
address: non_empty_string(state.data.address(row)),
|
||||
postcode: non_empty_string(state.data.postcode(row)),
|
||||
is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
|
||||
property_type: lookup_enum_value(
|
||||
enum_features,
|
||||
|
|
@ -215,16 +232,17 @@ pub async fn get_hexagon_properties(
|
|||
"GET /api/hexagon-properties"
|
||||
);
|
||||
|
||||
HexagonPropertiesResponse {
|
||||
Ok(HexagonPropertiesResponse {
|
||||
properties,
|
||||
total,
|
||||
limit,
|
||||
offset,
|
||||
truncated,
|
||||
}
|
||||
})
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
Ok(Json(result))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue