Refactor and improve
This commit is contained in:
parent
1f148b2185
commit
242acff987
22 changed files with 754 additions and 1053 deletions
|
|
@ -9,22 +9,6 @@ pub const SERVER_ADDRESS: &str = "0.0.0.0:8001";
|
|||
pub const BOUNDS_QUANTIZATION: f64 = 0.01;
|
||||
pub const BOUNDS_BUFFER_PERCENT: f64 = 0.1;
|
||||
pub const GRID_CELL_SIZE: f32 = 0.01;
|
||||
pub const POSTCODE_MIN_RESOLUTION: u8 = 11;
|
||||
pub const MAX_POIS_PER_REQUEST: usize = 2500;
|
||||
pub const DEFAULT_PROPERTIES_LIMIT: usize = 100;
|
||||
pub const MAX_PROPERTIES_LIMIT: usize = 500;
|
||||
pub const ENUM_NULL: u8 = 255;
|
||||
|
||||
/// Canonical display order for POI category groups.
|
||||
/// The server will panic at startup if the data contains groups not in this list or vice versa.
|
||||
pub const POI_GROUP_ORDER: &[&str] = &[
|
||||
"Public Transport",
|
||||
"Amenity",
|
||||
"Building",
|
||||
"Craft",
|
||||
"Healthcare",
|
||||
"Leisure",
|
||||
"Office",
|
||||
"Shop",
|
||||
"Tourism",
|
||||
];
|
||||
|
|
|
|||
5
server-rs/src/data.rs
Normal file
5
server-rs/src/data.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
mod poi;
|
||||
mod property;
|
||||
|
||||
pub use poi::{POICategoryGroup, POIData};
|
||||
pub use property::{precompute_h3, Histogram, PropertyData};
|
||||
|
|
@ -45,8 +45,7 @@ pub struct EnumFeatureGroup {
|
|||
pub features: &'static [EnumFeatureConfig],
|
||||
}
|
||||
|
||||
/// Columns in parquet that are neither numeric features nor enum features.
|
||||
/// These are silently skipped during schema validation.
|
||||
/// Columns in parquet that are not filterable
|
||||
pub const IGNORED_COLUMNS: &[&str] = &[
|
||||
"lat",
|
||||
"lon",
|
||||
|
|
@ -792,3 +791,17 @@ pub fn bounds_for(name: &str) -> Option<&'static Bounds> {
|
|||
.find(|feature| feature.name == name)
|
||||
.map(|feature| &feature.bounds)
|
||||
}
|
||||
|
||||
/// Canonical display order for POI category groups.
|
||||
/// The server will panic at startup if the data contains groups not in this list or vice versa.
|
||||
pub const POI_GROUP_ORDER: &[&str] = &[
|
||||
"Public Transport",
|
||||
"Amenity",
|
||||
"Building",
|
||||
"Craft",
|
||||
"Healthcare",
|
||||
"Leisure",
|
||||
"Office",
|
||||
"Shop",
|
||||
"Tourism",
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,94 +0,0 @@
|
|||
use crate::consts::ENUM_NULL;
|
||||
use crate::data::EnumFeatureData;
|
||||
|
||||
pub struct ParsedFilter {
|
||||
pub feat_idx: usize,
|
||||
pub min: f32,
|
||||
pub max: f32,
|
||||
}
|
||||
|
||||
pub struct ParsedEnumFilter {
|
||||
pub enum_idx: usize,
|
||||
pub allowed: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Parse comma-separated filter string into numeric and enum filters.
|
||||
/// Numeric format: `name:min:max`
|
||||
/// Enum format: `name:val1|val2|val3` (pipe-separated values)
|
||||
pub fn parse_filters(
|
||||
filter_str: Option<&str>,
|
||||
feature_names: &[String],
|
||||
enum_features: &[EnumFeatureData],
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
|
||||
let mut numeric = Vec::new();
|
||||
let mut enums = Vec::new();
|
||||
|
||||
let input = match filter_str.filter(|text| !text.is_empty()) {
|
||||
Some(text) => text,
|
||||
None => return (numeric, enums),
|
||||
};
|
||||
|
||||
for entry in input.split(',') {
|
||||
let parts: Vec<&str> = entry.splitn(2, ':').collect();
|
||||
if parts.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let name = parts[0].trim();
|
||||
let rest = parts[1].trim();
|
||||
|
||||
if let Some(enum_idx) = enum_features
|
||||
.iter()
|
||||
.position(|enum_feat| enum_feat.name == name)
|
||||
{
|
||||
let enum_feat = &enum_features[enum_idx];
|
||||
let allowed: Vec<u8> = rest
|
||||
.split('|')
|
||||
.filter_map(|value| {
|
||||
let value = value.trim();
|
||||
enum_feat
|
||||
.values
|
||||
.iter()
|
||||
.position(|existing| existing == value)
|
||||
.map(|position| position as u8)
|
||||
})
|
||||
.collect();
|
||||
enums.push(ParsedEnumFilter { enum_idx, allowed });
|
||||
} else {
|
||||
let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
|
||||
if num_parts.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let min = match num_parts[0].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let max = match num_parts[1].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) {
|
||||
numeric.push(ParsedFilter { feat_idx, min, max });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(numeric, enums)
|
||||
}
|
||||
|
||||
pub fn row_passes_filters(
|
||||
row: usize,
|
||||
filters: &[ParsedFilter],
|
||||
enum_filters: &[ParsedEnumFilter],
|
||||
feature_data: &[f32],
|
||||
num_features: usize,
|
||||
enum_data: &[u8],
|
||||
num_enums: usize,
|
||||
) -> bool {
|
||||
filters.iter().all(|filter| {
|
||||
let value = feature_data[row * num_features + filter.feat_idx];
|
||||
value.is_finite() && value >= filter.min && value <= filter.max
|
||||
}) && enum_filters.iter().all(|enum_filter| {
|
||||
let value = enum_data[row * num_enums + enum_filter.enum_idx];
|
||||
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||
})
|
||||
}
|
||||
|
|
@ -1,13 +1,11 @@
|
|||
mod consts;
|
||||
mod data;
|
||||
mod features;
|
||||
mod filter;
|
||||
mod grid_index;
|
||||
mod og_middleware;
|
||||
pub mod parsing;
|
||||
mod routes;
|
||||
mod state;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub mod utils;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -21,7 +19,7 @@ use tower_http::compression::CompressionLayer;
|
|||
use tower_http::cors::{Any, CorsLayer};
|
||||
use tower_http::services::ServeDir;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing::info;
|
||||
use tracing::{info, warn};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use state::AppState;
|
||||
|
|
@ -78,12 +76,12 @@ async fn main() -> anyhow::Result<()> {
|
|||
info!(
|
||||
rows = property_data.lat.len(),
|
||||
features = property_data.num_features,
|
||||
enums = property_data.enum_features.len(),
|
||||
enums = property_data.enum_values.len(),
|
||||
"Property data loaded"
|
||||
);
|
||||
|
||||
info!("Building spatial grid index (0.01° cells)");
|
||||
let grid = grid_index::GridIndex::build(
|
||||
let grid = utils::GridIndex::build(
|
||||
&property_data.lat,
|
||||
&property_data.lon,
|
||||
consts::GRID_CELL_SIZE,
|
||||
|
|
@ -107,7 +105,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
info!("Building POI spatial grid index");
|
||||
let poi_grid =
|
||||
grid_index::GridIndex::build(&poi_data.lat, &poi_data.lng, consts::GRID_CELL_SIZE);
|
||||
utils::GridIndex::build(&poi_data.lat, &poi_data.lng, consts::GRID_CELL_SIZE);
|
||||
|
||||
let min_keys: Vec<String> = property_data
|
||||
.feature_names
|
||||
|
|
@ -119,64 +117,8 @@ async fn main() -> anyhow::Result<()> {
|
|||
.iter()
|
||||
.map(|name| format!("max_{}", name))
|
||||
.collect();
|
||||
let enum_min_keys: Vec<String> = property_data
|
||||
.enum_features
|
||||
.iter()
|
||||
.map(|enum_feature| format!("min_{}", enum_feature.name))
|
||||
.collect();
|
||||
let enum_max_keys: Vec<String> = property_data
|
||||
.enum_features
|
||||
.iter()
|
||||
.map(|enum_feature| format!("max_{}", enum_feature.name))
|
||||
.collect();
|
||||
|
||||
// Precompute POI category groups
|
||||
let poi_category_groups = {
|
||||
let mut group_cats: std::collections::HashMap<String, std::collections::HashSet<String>> =
|
||||
std::collections::HashMap::new();
|
||||
let num_pois = poi_data.category.indices.len();
|
||||
for row in 0..num_pois {
|
||||
let category = poi_data.category.get(row).to_string();
|
||||
let group = poi_data.group.get(row).to_string();
|
||||
group_cats.entry(group).or_default().insert(category);
|
||||
}
|
||||
// Validate that data groups match the hardcoded order exactly
|
||||
let expected: std::collections::HashSet<&str> =
|
||||
consts::POI_GROUP_ORDER.iter().copied().collect();
|
||||
let actual: std::collections::HashSet<&str> =
|
||||
group_cats.keys().map(|key| key.as_str()).collect();
|
||||
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
|
||||
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
|
||||
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
|
||||
bail!(
|
||||
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
|
||||
missing_from_data, missing_from_order
|
||||
);
|
||||
}
|
||||
consts::POI_GROUP_ORDER
|
||||
.iter()
|
||||
.map(|group_name| group_name.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
.map(|name| {
|
||||
let mut categories: Vec<String> = group_cats
|
||||
.remove(&name)
|
||||
.context("POI group validated but missing from map")?
|
||||
.into_iter()
|
||||
.collect();
|
||||
categories.sort();
|
||||
Ok(state::POICategoryGroup { name, categories })
|
||||
})
|
||||
.collect::<anyhow::Result<Vec<_>>>()?
|
||||
};
|
||||
|
||||
// Precompute enum name → index map
|
||||
let enum_name_to_idx: rustc_hash::FxHashMap<String, usize> = property_data
|
||||
.enum_features
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, enum_feature)| (enum_feature.name.clone(), index))
|
||||
.collect();
|
||||
let poi_category_groups = poi_data.category_groups()?;
|
||||
|
||||
// Read index.html at startup for crawler OG injection
|
||||
let frontend_dist = cli.dist.unwrap_or_else(|| {
|
||||
|
|
@ -200,7 +142,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
Some(html)
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!("Could not read index.html: {}", err);
|
||||
warn!("Could not read index.html: {}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
@ -217,6 +159,12 @@ async fn main() -> anyhow::Result<()> {
|
|||
);
|
||||
}
|
||||
|
||||
let features_response = routes::build_features_response(&property_data);
|
||||
info!(
|
||||
groups = features_response.groups.len(),
|
||||
"Precomputed features response"
|
||||
);
|
||||
|
||||
let state = Arc::new(AppState {
|
||||
data: property_data,
|
||||
grid,
|
||||
|
|
@ -225,10 +173,8 @@ async fn main() -> anyhow::Result<()> {
|
|||
poi_grid,
|
||||
min_keys,
|
||||
max_keys,
|
||||
enum_min_keys,
|
||||
enum_max_keys,
|
||||
poi_category_groups,
|
||||
enum_name_to_idx,
|
||||
features_response,
|
||||
og_sidecar_url: cli.og_sidecar_url,
|
||||
public_url: cli.public_url,
|
||||
index_html,
|
||||
|
|
|
|||
|
|
@ -5,10 +5,11 @@ use axum::extract::Request;
|
|||
use axum::http::header;
|
||||
use axum::middleware::Next;
|
||||
use axum::response::Response;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
const OG_PLACEHOLDER: &str = r#"<meta name="x-og-placeholder" content="__NARROWIT_OG_TAGS__"/>"#;
|
||||
|
||||
pub async fn og_middleware(request: Request, next: Next) -> Response {
|
||||
// Capture the query string before passing the request through
|
||||
let query_string = request.uri().query().unwrap_or("").to_string();
|
||||
|
|
@ -46,19 +47,18 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
|
|||
};
|
||||
|
||||
let og_tags = format!(
|
||||
r#"<title>Narrowit</title>
|
||||
<meta property="og:title" content="Narrowit — UK Property Map" />
|
||||
<meta property="og:description" content="Interactive property data visualization for England & Wales" />
|
||||
r#"<meta property="og:title" content="Narrowit — Every neighbourhood in England & Wales" />
|
||||
<meta property="og:description" content="Explore property prices, energy ratings, crime stats, school ratings, and more across England & Wales on one interactive map." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:image" content="{og_image_url}" />
|
||||
<meta property="og:image:width" content="1200" />
|
||||
<meta property="og:image:height" content="630" />
|
||||
<meta name="twitter:card" content="summary_large_image" />
|
||||
<meta name="twitter:image" content="{og_image_url}" />"#
|
||||
<meta name="twitter:title" content="Narrowit — Every neighbourhood in England & Wales" />
|
||||
<meta name="twitter:description" content="Explore property prices, energy ratings, crime stats, school ratings, and more across England & Wales on one interactive map." />"#
|
||||
);
|
||||
|
||||
// Replace the <title> tag with title + OG meta tags
|
||||
let re = Regex::new(r"<title>Narrowit</title>").unwrap();
|
||||
let html = re.replace(index_html, og_tags.as_str()).to_string();
|
||||
let html = index_html.replace(OG_PLACEHOLDER, &og_tags);
|
||||
|
||||
let (parts, _body) = response.into_parts();
|
||||
Response::from_parts(parts, Body::from(html))
|
||||
|
|
|
|||
5
server-rs/src/parsing.rs
Normal file
5
server-rs/src/parsing.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
mod bounds;
|
||||
mod filters;
|
||||
|
||||
pub use bounds::{h3_cell_bounds, parse_bounds};
|
||||
pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter};
|
||||
|
|
@ -50,3 +50,35 @@ pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCod
|
|||
|
||||
Ok((parts[0], parts[1], parts[2], parts[3]))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_valid() {
|
||||
assert_eq!(parse_bounds("1.0,2.0,3.0,4.0").unwrap(), (1.0, 2.0, 3.0, 4.0));
|
||||
assert_eq!(parse_bounds("-51.5, -0.1, 51.6, 0.2").unwrap(), (-51.5, -0.1, 51.6, 0.2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_invalid() {
|
||||
assert!(parse_bounds("1.0,2.0,3.0").is_err());
|
||||
assert!(parse_bounds("1.0,2.0,3.0,4.0,5.0").is_err());
|
||||
assert!(parse_bounds("a,b,c,d").is_err());
|
||||
assert!(parse_bounds("").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h3_cell_bounds_applies_buffer() {
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.0);
|
||||
let (buf_min_lat, buf_min_lon, buf_max_lat, buf_max_lon) = h3_cell_bounds(cell, 0.1);
|
||||
|
||||
assert!((min_lat - buf_min_lat - 0.1).abs() < 1e-10);
|
||||
assert!((min_lon - buf_min_lon - 0.1).abs() < 1e-10);
|
||||
assert!((buf_max_lat - max_lat - 0.1).abs() < 1e-10);
|
||||
assert!((buf_max_lon - max_lon - 0.1).abs() < 1e-10);
|
||||
}
|
||||
}
|
||||
166
server-rs/src/parsing/filters.rs
Normal file
166
server-rs/src/parsing/filters.rs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
/// Filter for numeric features: value must be in [min, max] range.
|
||||
pub struct ParsedFilter {
|
||||
pub feat_idx: usize,
|
||||
pub min: f32,
|
||||
pub max: f32,
|
||||
}
|
||||
|
||||
/// Filter for enum features: value must be one of the allowed indices.
|
||||
pub struct ParsedEnumFilter {
|
||||
pub feat_idx: usize,
|
||||
pub allowed: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Parse comma-separated filter string into numeric and enum filters.
|
||||
/// Numeric format: `name:min:max`
|
||||
/// Enum format: `name:val1|val2|val3` (pipe-separated string values)
|
||||
pub fn parse_filters(
|
||||
filter_str: Option<&str>,
|
||||
feature_names: &[String],
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
|
||||
let mut numeric = Vec::new();
|
||||
let mut enums = Vec::new();
|
||||
|
||||
let input = match filter_str.filter(|text| !text.is_empty()) {
|
||||
Some(text) => text,
|
||||
None => return (numeric, enums),
|
||||
};
|
||||
|
||||
for entry in input.split(',') {
|
||||
let parts: Vec<&str> = entry.splitn(2, ':').collect();
|
||||
if parts.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let name = parts[0].trim();
|
||||
let rest = parts[1].trim();
|
||||
|
||||
// Find feature index by name
|
||||
let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Check if this is an enum feature
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
// Enum filter: convert string values to f32 indices
|
||||
let allowed: Vec<f32> = rest
|
||||
.split('|')
|
||||
.filter_map(|value| {
|
||||
let value = value.trim();
|
||||
values
|
||||
.iter()
|
||||
.position(|existing| existing == value)
|
||||
.map(|position| position as f32)
|
||||
})
|
||||
.collect();
|
||||
enums.push(ParsedEnumFilter { feat_idx, allowed });
|
||||
} else {
|
||||
// Numeric filter: parse min:max
|
||||
let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
|
||||
if num_parts.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let min = match num_parts[0].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let max = match num_parts[1].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
numeric.push(ParsedFilter { feat_idx, min, max });
|
||||
}
|
||||
}
|
||||
|
||||
(numeric, enums)
|
||||
}
|
||||
|
||||
/// Check if a row passes all filters.
|
||||
/// All features (numeric and enum) are stored in feature_data as f32.
|
||||
pub fn row_passes_filters(
|
||||
row: usize,
|
||||
filters: &[ParsedFilter],
|
||||
enum_filters: &[ParsedEnumFilter],
|
||||
feature_data: &[f32],
|
||||
num_features: usize,
|
||||
) -> bool {
|
||||
let base = row * num_features;
|
||||
|
||||
filters.iter().all(|filter| {
|
||||
let value = feature_data[base + filter.feat_idx];
|
||||
value.is_finite() && value >= filter.min && value <= filter.max
|
||||
}) && enum_filters.iter().all(|filter| {
|
||||
let value = feature_data[base + filter.feat_idx];
|
||||
value.is_finite() && filter.allowed.contains(&value)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn feature_names() -> Vec<String> {
|
||||
vec!["price".into(), "area".into(), "rating".into()]
|
||||
}
|
||||
|
||||
fn enum_values() -> FxHashMap<usize, Vec<String>> {
|
||||
let mut map = FxHashMap::default();
|
||||
map.insert(2, vec!["A".into(), "B".into(), "C".into()]);
|
||||
map
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filters_numeric() {
|
||||
let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_names(), &enum_values());
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
assert_eq!(numeric[0].min, 100.0);
|
||||
assert_eq!(numeric[0].max, 500.0);
|
||||
assert!(enums.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filters_enum() {
|
||||
let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_names(), &enum_values());
|
||||
assert!(numeric.is_empty());
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert_eq!(enums[0].feat_idx, 2);
|
||||
assert_eq!(enums[0].allowed, vec![0.0, 2.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filters_empty_and_invalid() {
|
||||
let (n, e) = parse_filters(None, &feature_names(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
|
||||
let (n, e) = parse_filters(Some(""), &feature_names(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
|
||||
let (n, e) = parse_filters(Some("unknown:1:2"), &feature_names(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_passes_numeric_filter() {
|
||||
let filters = vec![ParsedFilter { feat_idx: 0, min: 10.0, max: 20.0 }];
|
||||
let data = vec![15.0, 5.0, f32::NAN];
|
||||
|
||||
assert!(row_passes_filters(0, &filters, &[], &data, 1));
|
||||
assert!(!row_passes_filters(1, &filters, &[], &data, 1));
|
||||
assert!(!row_passes_filters(2, &filters, &[], &data, 1)); // NaN fails
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_passes_enum_filter() {
|
||||
let filters = vec![ParsedEnumFilter { feat_idx: 0, allowed: vec![0.0, 2.0] }];
|
||||
// Row 0: value 0.0 (allowed), Row 1: value 1.0 (not allowed), Row 2: value 2.0 (allowed), Row 3: NaN (fails)
|
||||
let data = vec![0.0, 1.0, 2.0, f32::NAN];
|
||||
|
||||
assert!(row_passes_filters(0, &[], &filters, &data, 1));
|
||||
assert!(!row_passes_filters(1, &[], &filters, &data, 1));
|
||||
assert!(row_passes_filters(2, &[], &filters, &data, 1));
|
||||
assert!(!row_passes_filters(3, &[], &filters, &data, 1)); // NaN fails
|
||||
}
|
||||
}
|
||||
|
|
@ -2,11 +2,10 @@ mod features;
|
|||
mod hexagon_stats;
|
||||
pub(crate) mod hexagons;
|
||||
mod og_image;
|
||||
pub(crate) mod parse;
|
||||
mod pois;
|
||||
pub(crate) mod properties;
|
||||
|
||||
pub use features::get_features;
|
||||
pub use features::{build_features_response, get_features, FeaturesResponse};
|
||||
pub use hexagon_stats::get_hexagon_stats;
|
||||
pub use hexagons::get_hexagons;
|
||||
pub use og_image::get_og_image;
|
||||
|
|
@ -4,11 +4,11 @@ use axum::response::Json;
|
|||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
use crate::data::Histogram;
|
||||
use crate::data::{Histogram, PropertyData};
|
||||
use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Clone, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum FeatureInfo {
|
||||
#[serde(rename = "numeric")]
|
||||
|
|
@ -32,18 +32,19 @@ pub enum FeatureInfo {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Clone, Serialize)]
|
||||
pub struct FeatureGroupResponse {
|
||||
name: String,
|
||||
features: Vec<FeatureInfo>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Clone, Serialize)]
|
||||
pub struct FeaturesResponse {
|
||||
groups: Vec<FeatureGroupResponse>,
|
||||
pub groups: Vec<FeatureGroupResponse>,
|
||||
}
|
||||
|
||||
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
||||
/// Build the features response at startup. Called once and cached in AppState.
|
||||
pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
|
||||
// Collect all group names in order, merging numeric and enum groups with the same name
|
||||
let mut group_names: Vec<&str> = Vec::new();
|
||||
for feature_group in FEATURE_GROUPS {
|
||||
|
|
@ -66,13 +67,12 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
|||
for feature_group in FEATURE_GROUPS {
|
||||
if feature_group.name == group_name {
|
||||
for feature_config in feature_group.features {
|
||||
if let Some(feat_idx) = state
|
||||
.data
|
||||
if let Some(feat_idx) = data
|
||||
.feature_names
|
||||
.iter()
|
||||
.position(|feat_name| feat_name == feature_config.name)
|
||||
{
|
||||
let stats = &state.data.feature_stats[feat_idx];
|
||||
let stats = &data.feature_stats[feat_idx];
|
||||
features.push(FeatureInfo::Numeric {
|
||||
name: feature_config.name.to_string(),
|
||||
min: stats.slider_min,
|
||||
|
|
@ -92,15 +92,17 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
|||
for enum_group in ENUM_FEATURE_GROUPS {
|
||||
if enum_group.name == group_name {
|
||||
for enum_config in enum_group.features {
|
||||
if let Some(enum_feature) = state
|
||||
.data
|
||||
.enum_features
|
||||
// Find the feature index by name
|
||||
if let Some(feat_idx) = data
|
||||
.feature_names
|
||||
.iter()
|
||||
.find(|enum_feat| enum_feat.name == enum_config.name)
|
||||
.position(|name| name == enum_config.name)
|
||||
{
|
||||
// Check if this feature has enum values
|
||||
if let Some(values) = data.enum_values.get(&feat_idx) {
|
||||
features.push(FeatureInfo::Enum {
|
||||
name: enum_config.name.to_string(),
|
||||
values: enum_feature.values.clone(),
|
||||
values: values.clone(),
|
||||
description: enum_config.description,
|
||||
detail: enum_config.detail,
|
||||
source: enum_config.source,
|
||||
|
|
@ -109,6 +111,7 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !features.is_empty() {
|
||||
groups.push(FeatureGroupResponse {
|
||||
|
|
@ -118,22 +121,10 @@ pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
|||
}
|
||||
}
|
||||
|
||||
let num_numeric: usize = groups
|
||||
.iter()
|
||||
.flat_map(|group| &group.features)
|
||||
.filter(|feature| matches!(feature, FeatureInfo::Numeric { .. }))
|
||||
.count();
|
||||
let num_enum: usize = groups
|
||||
.iter()
|
||||
.flat_map(|group| &group.features)
|
||||
.filter(|feature| matches!(feature, FeatureInfo::Enum { .. }))
|
||||
.count();
|
||||
|
||||
info!(
|
||||
numeric = num_numeric,
|
||||
enums = num_enum,
|
||||
groups = groups.len(),
|
||||
"GET /api/features"
|
||||
);
|
||||
Json(FeaturesResponse { groups })
|
||||
FeaturesResponse { groups }
|
||||
}
|
||||
|
||||
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
||||
info!("GET /api/features");
|
||||
Json(state.features_response.clone())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,50 @@
|
|||
use std::fmt::Write;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::IntoResponse;
|
||||
use serde::Deserialize;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
use super::parse::h3_cell_bounds;
|
||||
#[derive(Serialize)]
|
||||
pub struct HistogramStats {
|
||||
min: f64,
|
||||
max: f64,
|
||||
/// 1st percentile (left edge of main distribution)
|
||||
p1: f64,
|
||||
/// 99th percentile (right edge of main distribution)
|
||||
p99: f64,
|
||||
counts: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct NumericFeatureStats {
|
||||
name: String,
|
||||
count: usize,
|
||||
min: f64,
|
||||
max: f64,
|
||||
mean: f64,
|
||||
histogram: HistogramStats,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct EnumFeatureStats {
|
||||
name: String,
|
||||
counts: HashMap<String, u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct HexagonStatsResponse {
|
||||
count: usize,
|
||||
numeric_features: Vec<NumericFeatureStats>,
|
||||
enum_features: Vec<EnumFeatureStats>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct HexagonStatsParams {
|
||||
|
|
@ -20,15 +52,14 @@ pub struct HexagonStatsParams {
|
|||
pub resolution: u8,
|
||||
pub filters: Option<String>,
|
||||
/// Comma-separated feature names to include in stats response.
|
||||
/// When present (even if empty), only listed features are computed.
|
||||
/// When absent, all features are returned (backward compatible).
|
||||
/// Only listed features are computed; if absent or empty, no features are returned.
|
||||
pub fields: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_hexagon_stats(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<HexagonStatsParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
) -> Result<Json<HexagonStatsResponse>, (StatusCode, String)> {
|
||||
let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| {
|
||||
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
|
||||
(
|
||||
|
|
@ -57,36 +88,34 @@ pub async fn get_hexagon_stats(
|
|||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.data.enum_features,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
// Parse optional `fields` param into sets of feature names.
|
||||
// None = include all, Some = only include listed features.
|
||||
let field_set: Option<std::collections::HashSet<String>> =
|
||||
params.fields.as_ref().map(|fields_str| {
|
||||
let fields_specified = params.fields.is_some();
|
||||
let field_set: std::collections::HashSet<String> = params
|
||||
.fields
|
||||
.as_ref()
|
||||
.map(|fields_str| {
|
||||
fields_str
|
||||
.split(',')
|
||||
.map(|field| field.trim().to_string())
|
||||
.filter(|field| !field.is_empty())
|
||||
.collect()
|
||||
});
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let response = tokio::task::spawn_blocking(move || {
|
||||
let start_time = std::time::Instant::now();
|
||||
let precomputed = &state.h3_cells;
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||
let num_features = state.data.num_features;
|
||||
let num_enums = state.data.num_enums;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_data = &state.data.enum_data;
|
||||
let enum_features = &state.data.enum_features;
|
||||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
||||
// Resolve cell at requested resolution from precomputed max-resolution cell
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
|
|
@ -99,7 +128,6 @@ pub async fn get_hexagon_stats(
|
|||
.unwrap_or(0)
|
||||
};
|
||||
|
||||
// Collect matching rows
|
||||
let mut matching_rows: Vec<usize> = Vec::new();
|
||||
state
|
||||
.grid
|
||||
|
|
@ -112,8 +140,6 @@ pub async fn get_hexagon_stats(
|
|||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_data,
|
||||
num_enums,
|
||||
)
|
||||
{
|
||||
matching_rows.push(row);
|
||||
|
|
@ -122,32 +148,61 @@ pub async fn get_hexagon_stats(
|
|||
|
||||
let total_count = matching_rows.len();
|
||||
|
||||
// Build JSON directly via string buffer
|
||||
let mut output = String::with_capacity(4096);
|
||||
output.push_str("{\"count\":");
|
||||
write!(output, "{}", total_count).unwrap();
|
||||
let mut numeric_features = Vec::new();
|
||||
let mut enum_features_out = Vec::new();
|
||||
|
||||
// Numeric features: compute count, min, max, sum, histogram using global bin edges
|
||||
output.push_str(",\"numeric_features\":[");
|
||||
let mut first_numeric = true;
|
||||
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
|
||||
// Skip features not in the requested set (when fields param is present)
|
||||
if let Some(ref set) = field_set {
|
||||
if !set.contains(feature_name.as_str()) {
|
||||
if fields_specified && !field_set.contains(feature_name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is an enum feature
|
||||
if let Some(enum_values) = state.data.enum_values.get(&feature_index) {
|
||||
// Enum feature: count occurrences of each value
|
||||
let mut value_counts = vec![0u64; enum_values.len()];
|
||||
for &row in &matching_rows {
|
||||
let value = feature_data[row * num_features + feature_index];
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if idx < value_counts.len() {
|
||||
value_counts[idx] += 1;
|
||||
}
|
||||
let global_stats = &state.data.feature_stats[feature_index];
|
||||
let histogram_min = global_stats.histogram.min;
|
||||
let histogram_max = global_stats.histogram.max;
|
||||
let bin_width = global_stats.histogram.bin_width;
|
||||
}
|
||||
}
|
||||
|
||||
let counts: HashMap<String, u64> = value_counts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, &count)| count > 0)
|
||||
.map(|(idx, &count)| (enum_values[idx].clone(), count))
|
||||
.collect();
|
||||
|
||||
if !counts.is_empty() {
|
||||
enum_features_out.push(EnumFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
counts,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Numeric feature: compute stats and histogram
|
||||
let global_hist = &state.data.feature_stats[feature_index].histogram;
|
||||
let p1 = global_hist.p1;
|
||||
let p99 = global_hist.p99;
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64; // keep f64 for mean precision
|
||||
let mut sum = 0.0f64;
|
||||
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||
|
||||
// Compute middle bin width (between p1 and p99)
|
||||
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
|
||||
let middle_width = if middle_bins > 0 && p99 > p1 {
|
||||
(p99 - p1) / middle_bins as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
for &row in &matching_rows {
|
||||
let value = feature_data[row * num_features + feature_index];
|
||||
if value.is_finite() {
|
||||
|
|
@ -160,101 +215,40 @@ pub async fn get_hexagon_stats(
|
|||
}
|
||||
sum += value as f64;
|
||||
|
||||
// Bin into histogram using global edges (cast to f64 for bin index math)
|
||||
if bin_width > 0.0 {
|
||||
let bin_index = ((value as f64 - histogram_min as f64) / bin_width as f64)
|
||||
.floor() as isize;
|
||||
let clamped_index =
|
||||
bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
|
||||
bins[clamped_index] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !first_numeric {
|
||||
output.push(',');
|
||||
}
|
||||
first_numeric = false;
|
||||
|
||||
let mean = sum / count as f64;
|
||||
output.push_str("{\"name\":");
|
||||
write_json_string(&mut output, feature_name);
|
||||
write!(output, ",\"count\":{}", count).unwrap();
|
||||
write!(output, ",\"min\":{}", format_num(min_value)).unwrap();
|
||||
write!(output, ",\"max\":{}", format_num(max_value)).unwrap();
|
||||
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
|
||||
output.push_str(",\"histogram\":{\"min\":");
|
||||
write!(output, "{}", format_num(histogram_min)).unwrap();
|
||||
output.push_str(",\"max\":");
|
||||
write!(output, "{}", format_num(histogram_max)).unwrap();
|
||||
output.push_str(",\"bin_width\":");
|
||||
write!(output, "{}", format_num(bin_width)).unwrap();
|
||||
output.push_str(",\"counts\":[");
|
||||
for (bin_index, &bin_count) in bins.iter().enumerate() {
|
||||
if bin_index > 0 {
|
||||
output.push(',');
|
||||
}
|
||||
write!(output, "{}", bin_count).unwrap();
|
||||
}
|
||||
output.push_str("]}}")
|
||||
}
|
||||
|
||||
// Enum features: count per value
|
||||
output.push_str("],\"enum_features\":[");
|
||||
let mut first_enum = true;
|
||||
for enum_feature in enum_features {
|
||||
// Skip enum features not in the requested set
|
||||
if let Some(ref set) = field_set {
|
||||
if !set.contains(enum_feature.name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
|
||||
Some(&index) => index,
|
||||
None => continue,
|
||||
// Bin using p1/p99 outlier structure
|
||||
let bin = if value < p1 {
|
||||
0 // Low outlier bin
|
||||
} else if value >= p99 {
|
||||
HISTOGRAM_BINS - 1 // High outlier bin
|
||||
} else if middle_width > 0.0 {
|
||||
// Middle bins (1 to n-2)
|
||||
let middle_bin = ((value - p1) / middle_width) as usize;
|
||||
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
|
||||
} else {
|
||||
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
|
||||
};
|
||||
|
||||
let mut value_counts = vec![0u64; enum_feature.values.len()];
|
||||
for &row in &matching_rows {
|
||||
let value = enum_data[row * num_enums + enum_index];
|
||||
if value != ENUM_NULL && (value as usize) < value_counts.len() {
|
||||
value_counts[value as usize] += 1;
|
||||
bins[bin] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Only include if there are any non-zero counts
|
||||
let has_values = value_counts.iter().any(|&count| count > 0);
|
||||
if !has_values {
|
||||
continue;
|
||||
if count > 0 {
|
||||
numeric_features.push(NumericFeatureStats {
|
||||
name: feature_name.clone(),
|
||||
count,
|
||||
min: min_value as f64,
|
||||
max: max_value as f64,
|
||||
mean: sum / count as f64,
|
||||
histogram: HistogramStats {
|
||||
min: global_hist.min as f64,
|
||||
max: global_hist.max as f64,
|
||||
p1: p1 as f64,
|
||||
p99: p99 as f64,
|
||||
counts: bins,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if !first_enum {
|
||||
output.push(',');
|
||||
}
|
||||
first_enum = false;
|
||||
|
||||
output.push_str("{\"name\":");
|
||||
write_json_string(&mut output, &enum_feature.name);
|
||||
output.push_str(",\"counts\":{");
|
||||
let mut first_value = true;
|
||||
for (value_index, &count) in value_counts.iter().enumerate() {
|
||||
if count == 0 {
|
||||
continue;
|
||||
}
|
||||
if !first_value {
|
||||
output.push(',');
|
||||
}
|
||||
first_value = false;
|
||||
write_json_string(&mut output, &enum_feature.values[value_index]);
|
||||
write!(output, ":{}", count).unwrap();
|
||||
}
|
||||
output.push_str("}}");
|
||||
}
|
||||
output.push_str("]}");
|
||||
|
||||
let elapsed = start_time.elapsed();
|
||||
info!(
|
||||
|
|
@ -267,46 +261,15 @@ pub async fn get_hexagon_stats(
|
|||
"GET /api/hexagon-stats"
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
Ok(HexagonStatsResponse {
|
||||
count: total_count,
|
||||
numeric_features,
|
||||
enum_features: enum_features_out,
|
||||
})
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
Ok((
|
||||
[(axum::http::header::CONTENT_TYPE, "application/json")],
|
||||
result,
|
||||
))
|
||||
}
|
||||
|
||||
fn write_json_string(output: &mut String, value: &str) {
|
||||
output.push('"');
|
||||
for character in value.chars() {
|
||||
match character {
|
||||
'"' => output.push_str("\\\""),
|
||||
'\\' => output.push_str("\\\\"),
|
||||
'\n' => output.push_str("\\n"),
|
||||
'\r' => output.push_str("\\r"),
|
||||
'\t' => output.push_str("\\t"),
|
||||
other => output.push(other),
|
||||
}
|
||||
}
|
||||
output.push('"');
|
||||
}
|
||||
|
||||
fn format_num(value: f32) -> String {
|
||||
let fv = value as f64;
|
||||
if fv.fract() == 0.0 && fv.abs() < 1e15 {
|
||||
format!("{:.1}", fv)
|
||||
} else {
|
||||
format!("{}", fv)
|
||||
}
|
||||
}
|
||||
|
||||
fn format_f64(value: f64) -> String {
|
||||
if value.fract() == 0.0 && value.abs() < 1e15 {
|
||||
format!("{:.1}", value)
|
||||
} else {
|
||||
format!("{}", value)
|
||||
}
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +1,22 @@
|
|||
use std::fmt::{self, Write};
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::IntoResponse;
|
||||
use axum::response::Json;
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::Deserialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX,
|
||||
H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION,
|
||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
||||
};
|
||||
use crate::filter::parse_filters;
|
||||
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
use super::parse::parse_bounds;
|
||||
|
||||
struct HumanBytes(usize);
|
||||
|
||||
impl fmt::Display for HumanBytes {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let bytes = self.0;
|
||||
if bytes >= 1_000_000 {
|
||||
write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
|
||||
} else if bytes >= 1_000 {
|
||||
write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
|
||||
} else {
|
||||
write!(formatter, "{} B", bytes)
|
||||
}
|
||||
}
|
||||
#[derive(Serialize)]
|
||||
pub struct HexagonsResponse {
|
||||
features: Vec<Map<String, Value>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -51,28 +38,14 @@ struct CellAgg {
|
|||
count: u32,
|
||||
mins: Box<[f32]>,
|
||||
maxs: Box<[f32]>,
|
||||
/// Min/max ordinal indices for enum features (255 = no data yet)
|
||||
enum_mins: Box<[u8]>,
|
||||
enum_maxs: Box<[u8]>,
|
||||
/// Most common postcode in this cell (only tracked at high resolutions)
|
||||
postcode: Option<String>,
|
||||
postcode_count: u32,
|
||||
lat_sum: f64,
|
||||
lon_sum: f64,
|
||||
}
|
||||
|
||||
impl CellAgg {
|
||||
fn new(num_features: usize, num_enums: usize) -> Self {
|
||||
fn new(num_features: usize) -> Self {
|
||||
CellAgg {
|
||||
count: 0,
|
||||
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
|
||||
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
|
||||
enum_mins: vec![ENUM_NULL; num_enums].into_boxed_slice(),
|
||||
enum_maxs: vec![0; num_enums].into_boxed_slice(),
|
||||
postcode: None,
|
||||
postcode_count: 0,
|
||||
lat_sum: 0.0,
|
||||
lon_sum: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -96,23 +69,6 @@ impl CellAgg {
|
|||
}
|
||||
}
|
||||
|
||||
/// Track min/max ordinal index for each enum feature in this cell.
|
||||
#[inline]
|
||||
fn add_enums(&mut self, enum_data: &[u8], row: usize, num_enums: usize) {
|
||||
let base = row * num_enums;
|
||||
let row_slice = &enum_data[base..base + num_enums];
|
||||
for (enum_index, &value) in row_slice.iter().enumerate() {
|
||||
if value != ENUM_NULL {
|
||||
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||
self.enum_mins[enum_index] = value;
|
||||
}
|
||||
if value > self.enum_maxs[enum_index] {
|
||||
self.enum_maxs[enum_index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a row, only aggregating the features at the given indices.
|
||||
#[inline]
|
||||
fn add_row_selective(
|
||||
|
|
@ -136,178 +92,57 @@ impl CellAgg {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Track min/max ordinal index for selected enum features only.
|
||||
#[inline]
|
||||
fn add_enums_selective(
|
||||
&mut self,
|
||||
enum_data: &[u8],
|
||||
row: usize,
|
||||
num_enums: usize,
|
||||
indices: &[usize],
|
||||
) {
|
||||
let base = row * num_enums;
|
||||
for &enum_index in indices {
|
||||
let value = enum_data[base + enum_index];
|
||||
if value != ENUM_NULL {
|
||||
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||
self.enum_mins[enum_index] = value;
|
||||
}
|
||||
if value > self.enum_maxs[enum_index] {
|
||||
self.enum_maxs[enum_index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Track postcode and centroid for high-resolution cells.
|
||||
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
|
||||
#[inline]
|
||||
fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) {
|
||||
self.lat_sum += lat as f64;
|
||||
self.lon_sum += lon as f64;
|
||||
if postcode.is_empty() {
|
||||
return;
|
||||
}
|
||||
if self.postcode.is_none() {
|
||||
self.postcode = Some(postcode.to_string());
|
||||
self.postcode_count = 1;
|
||||
} else if self.postcode.as_deref() == Some(postcode) {
|
||||
self.postcode_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a string for inclusion in a JSON string literal.
|
||||
pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
|
||||
for character in text.chars() {
|
||||
match character {
|
||||
'"' => buf.push_str("\\\""),
|
||||
'\\' => buf.push_str("\\\\"),
|
||||
'\n' => buf.push_str("\\n"),
|
||||
'\r' => buf.push_str("\\r"),
|
||||
'\t' => buf.push_str("\\t"),
|
||||
ctrl if ctrl < '\x20' => {
|
||||
let _ = write!(buf, "\\u{:04x}", ctrl as u32);
|
||||
}
|
||||
other => buf.push(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the hexagons JSON response directly to a String buffer,
|
||||
/// avoiding serde_json::Value allocations entirely.
|
||||
/// When `numeric_indices` / `enum_indices` are Some, only those features are written.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn write_hexagons_json(
|
||||
buf: &mut String,
|
||||
/// Build feature maps from aggregated cell data.
|
||||
fn build_feature_maps(
|
||||
groups: &FxHashMap<u64, CellAgg>,
|
||||
min_keys: &[String],
|
||||
max_keys: &[String],
|
||||
num_features: usize,
|
||||
enum_min_keys: &[String],
|
||||
enum_max_keys: &[String],
|
||||
num_enums: usize,
|
||||
include_postcode: bool,
|
||||
numeric_indices: Option<&[usize]>,
|
||||
enum_indices: Option<&[usize]>,
|
||||
) {
|
||||
buf.push_str("{\"features\":[");
|
||||
let mut first = true;
|
||||
indices: Option<&[usize]>,
|
||||
) -> Vec<Map<String, Value>> {
|
||||
let mut features = Vec::with_capacity(groups.len());
|
||||
|
||||
for (&cell_id, aggregation) in groups {
|
||||
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if !first {
|
||||
buf.push(',');
|
||||
}
|
||||
first = false;
|
||||
let mut map = Map::new();
|
||||
map.insert("h3".into(), Value::String(cell.to_string()));
|
||||
map.insert("count".into(), Value::Number(aggregation.count.into()));
|
||||
|
||||
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
||||
let iter: Box<dyn Iterator<Item = usize>> = if let Some(idx) = indices {
|
||||
Box::new(idx.iter().copied())
|
||||
} else {
|
||||
Box::new(0..num_features)
|
||||
};
|
||||
|
||||
if let Some(indices) = numeric_indices {
|
||||
for &feat_index in indices {
|
||||
for feat_index in iter {
|
||||
if aggregation.mins[feat_index].is_finite()
|
||||
&& aggregation.maxs[feat_index].is_finite()
|
||||
{
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
min_keys[feat_index],
|
||||
aggregation.mins[feat_index],
|
||||
max_keys[feat_index],
|
||||
aggregation.maxs[feat_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for feat_index in 0..num_features {
|
||||
if aggregation.mins[feat_index].is_finite()
|
||||
&& aggregation.maxs[feat_index].is_finite()
|
||||
{
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
min_keys[feat_index],
|
||||
aggregation.mins[feat_index],
|
||||
max_keys[feat_index],
|
||||
aggregation.maxs[feat_index]
|
||||
);
|
||||
if let (Some(min_num), Some(max_num)) = (
|
||||
serde_json::Number::from_f64(aggregation.mins[feat_index] as f64),
|
||||
serde_json::Number::from_f64(aggregation.maxs[feat_index] as f64),
|
||||
) {
|
||||
map.insert(min_keys[feat_index].clone(), Value::Number(min_num));
|
||||
map.insert(max_keys[feat_index].clone(), Value::Number(max_num));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(indices) = enum_indices {
|
||||
for &enum_index in indices {
|
||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
enum_min_keys[enum_index],
|
||||
aggregation.enum_mins[enum_index],
|
||||
enum_max_keys[enum_index],
|
||||
aggregation.enum_maxs[enum_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for enum_index in 0..num_enums {
|
||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||
let _ = write!(
|
||||
buf,
|
||||
",\"{}\":{},\"{}\":{}",
|
||||
enum_min_keys[enum_index],
|
||||
aggregation.enum_mins[enum_index],
|
||||
enum_max_keys[enum_index],
|
||||
aggregation.enum_maxs[enum_index]
|
||||
);
|
||||
}
|
||||
}
|
||||
features.push(map);
|
||||
}
|
||||
|
||||
if include_postcode {
|
||||
if let Some(ref postcode) = aggregation.postcode {
|
||||
let total = aggregation.count as f64;
|
||||
let centroid_lat = aggregation.lat_sum / total;
|
||||
let centroid_lon = aggregation.lon_sum / total;
|
||||
if centroid_lat.is_finite() && centroid_lon.is_finite() {
|
||||
buf.push_str(",\"postcode\":\"");
|
||||
write_json_escaped(buf, postcode);
|
||||
let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buf.push('}');
|
||||
}
|
||||
buf.push_str("]}");
|
||||
features
|
||||
}
|
||||
|
||||
pub async fn get_hexagons(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<HexagonParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
) -> Result<Json<HexagonsResponse>, (StatusCode, String)> {
|
||||
let resolution = params.resolution;
|
||||
if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) {
|
||||
warn!(
|
||||
|
|
@ -346,50 +181,40 @@ pub async fn get_hexagons(
|
|||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.data.enum_features,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
// Parse optional `fields` param into numeric and enum index sets.
|
||||
// Parse optional `fields` param into feature indices.
|
||||
// If `fields` is absent (None), all features are included.
|
||||
// If `fields` is present (even empty string), only listed features are included.
|
||||
let field_indices: Option<(Vec<usize>, Vec<usize>)> =
|
||||
params.fields.as_ref().map(|fields_str| {
|
||||
let mut numeric_indices = Vec::new();
|
||||
let mut enum_indices = Vec::new();
|
||||
if !fields_str.is_empty() {
|
||||
for name in fields_str.split(',') {
|
||||
let field_indices: Option<Vec<usize>> = params.fields.as_ref().map(|fields_str| {
|
||||
if fields_str.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
fields_str
|
||||
.split(',')
|
||||
.filter_map(|name| {
|
||||
let name = name.trim();
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
return None;
|
||||
}
|
||||
if let Some(idx) = state
|
||||
state
|
||||
.data
|
||||
.feature_names
|
||||
.iter()
|
||||
.position(|feat| feat == name)
|
||||
{
|
||||
numeric_indices.push(idx);
|
||||
} else if let Some(&idx) = state.enum_name_to_idx.get(name) {
|
||||
enum_indices.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
(numeric_indices, enum_indices)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
||||
let response = tokio::task::spawn_blocking(move || -> Result<HexagonsResponse, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
||||
let num_features = state.data.num_features;
|
||||
let num_enums = state.data.num_enums;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_data = &state.data.enum_data;
|
||||
|
||||
let min_keys = &state.min_keys;
|
||||
let max_keys = &state.max_keys;
|
||||
let enum_min_keys = &state.enum_min_keys;
|
||||
let enum_max_keys = &state.enum_max_keys;
|
||||
|
||||
let h3_res = h3o::Resolution::try_from(resolution)
|
||||
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||
|
|
@ -398,50 +223,20 @@ pub async fn get_hexagons(
|
|||
|
||||
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||
|
||||
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
||||
|
||||
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
||||
// enum must have value index in the allowed set
|
||||
let row_passes = |row: usize| -> bool {
|
||||
parsed_filters.iter().all(|filter| {
|
||||
let value = feature_data[row * num_features + filter.feat_idx];
|
||||
value.is_finite() && value >= filter.min && value <= filter.max
|
||||
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
||||
let value = enum_data[row * num_enums + enum_filter.enum_idx];
|
||||
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||
})
|
||||
};
|
||||
|
||||
// Choose aggregation strategy based on whether fields are specified
|
||||
let has_selective = field_indices.is_some();
|
||||
let (sel_numeric, sel_enum) = field_indices
|
||||
.as_ref()
|
||||
.map_or((&[][..], &[][..]), |(ni, ei)| {
|
||||
(ni.as_slice(), ei.as_slice())
|
||||
});
|
||||
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
|
||||
|
||||
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
if has_selective {
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_numeric);
|
||||
aggregation.add_enums_selective(enum_data, row, num_enums, sel_enum);
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
} else {
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
aggregation.add_enums(enum_data, row, num_enums);
|
||||
}
|
||||
if include_postcode {
|
||||
aggregation.add_postcode(
|
||||
state.data.postcode(row),
|
||||
state.data.lat[row],
|
||||
state.data.lon[row],
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve cell at requested resolution from precomputed max-resolution cell.
|
||||
// For max resolution, use directly; for lower resolutions, derive parent.
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
|
|
@ -458,7 +253,13 @@ pub async fn get_hexagons(
|
|||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes(row) {
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
aggregate_row(&mut groups, cell_for_row(row), row);
|
||||
|
|
@ -466,19 +267,12 @@ pub async fn get_hexagons(
|
|||
|
||||
let t_agg = t0.elapsed();
|
||||
|
||||
let mut json_buf = String::with_capacity(groups.len() * 128);
|
||||
write_hexagons_json(
|
||||
&mut json_buf,
|
||||
let features = build_feature_maps(
|
||||
&groups,
|
||||
min_keys,
|
||||
max_keys,
|
||||
num_features,
|
||||
enum_min_keys,
|
||||
enum_max_keys,
|
||||
num_enums,
|
||||
include_postcode,
|
||||
field_indices.as_ref().map(|(ni, _)| ni.as_slice()),
|
||||
field_indices.as_ref().map(|(_, ei)| ei.as_slice()),
|
||||
field_indices.as_deref(),
|
||||
);
|
||||
|
||||
let t_total = t0.elapsed();
|
||||
|
|
@ -489,15 +283,14 @@ pub async fn get_hexagons(
|
|||
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
||||
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
|
||||
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
|
||||
size = format_args!("{}", HumanBytes(json_buf.len())),
|
||||
"GET /api/hexagons"
|
||||
);
|
||||
|
||||
Ok(json_buf)
|
||||
Ok(HexagonsResponse { features })
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
Ok(([("content-type", "application/json")], json_body))
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use std::sync::Arc;
|
|||
use axum::extract::Query;
|
||||
use axum::http::{header, StatusCode};
|
||||
use axum::response::IntoResponse;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ pub async fn get_og_image(
|
|||
};
|
||||
|
||||
let url = format!("{}/screenshot{}", sidecar_url, qs);
|
||||
tracing::info!("Proxying OG screenshot request to: {}", url);
|
||||
info!("Proxying OG screenshot request to: {}", url);
|
||||
|
||||
match state.http_client.get(&url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
|
||||
|
|
@ -62,18 +63,18 @@ pub async fn get_og_image(
|
|||
)
|
||||
.into_response(),
|
||||
Err(err) => {
|
||||
tracing::warn!("Failed to read sidecar response: {}", err);
|
||||
warn!("Failed to read sidecar response: {}", err);
|
||||
(StatusCode::BAD_GATEWAY, "Failed to read screenshot").into_response()
|
||||
}
|
||||
},
|
||||
Ok(resp) => {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
tracing::warn!("Sidecar returned status {}: {}", status, body);
|
||||
warn!("Sidecar returned status {}: {}", status, body);
|
||||
(StatusCode::BAD_GATEWAY, "Screenshot sidecar error").into_response()
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!("Failed to reach sidecar: {}", err);
|
||||
warn!("Failed to reach sidecar: {}", err);
|
||||
(StatusCode::BAD_GATEWAY, "Screenshot sidecar unavailable").into_response()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,15 +2,31 @@ use std::sync::Arc;
|
|||
|
||||
use axum::extract::Query;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Json};
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
|
||||
use crate::consts::MAX_POIS_PER_REQUEST;
|
||||
use crate::state::{AppState, POICategoryGroup};
|
||||
use crate::data::POICategoryGroup;
|
||||
use crate::parsing::parse_bounds;
|
||||
use crate::state::AppState;
|
||||
|
||||
use super::hexagons::write_json_escaped;
|
||||
use super::parse::parse_bounds;
|
||||
#[derive(Serialize)]
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
pub struct POI {
|
||||
id: String,
|
||||
name: String,
|
||||
category: String,
|
||||
group: String,
|
||||
lat: f32,
|
||||
lng: f32,
|
||||
emoji: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct POIsResponse {
|
||||
pois: Vec<POI>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct POIParams {
|
||||
|
|
@ -22,7 +38,7 @@ pub struct POIParams {
|
|||
pub async fn get_pois(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<POIParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
|
||||
let bounds_str = params.bounds.ok_or((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"bounds parameter is required".into(),
|
||||
|
|
@ -43,12 +59,10 @@ pub async fn get_pois(
|
|||
|
||||
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
|
||||
|
||||
let json_body = tokio::task::spawn_blocking(move || {
|
||||
let pois = tokio::task::spawn_blocking(move || {
|
||||
let t0 = std::time::Instant::now();
|
||||
let row_indices = state.poi_grid.query(south, west, north, east);
|
||||
|
||||
// Collect matching row indices first, then sample randomly so the
|
||||
// subset covers the viewport uniformly instead of clustering in one area.
|
||||
let mut matching_rows: Vec<usize> = row_indices
|
||||
.iter()
|
||||
.filter_map(|&row_idx| {
|
||||
|
|
@ -73,36 +87,22 @@ pub async fn get_pois(
|
|||
}
|
||||
}
|
||||
|
||||
// Write JSON directly to string buffer, avoiding intermediate POI allocations
|
||||
let mut buf = String::with_capacity(matching_rows.len() * 128);
|
||||
buf.push_str("{\"pois\":[");
|
||||
|
||||
for (i, &row) in matching_rows.iter().enumerate() {
|
||||
if i > 0 {
|
||||
buf.push(',');
|
||||
}
|
||||
buf.push_str("{\"id\":\"");
|
||||
write_json_escaped(&mut buf, &state.poi_data.id[row]);
|
||||
buf.push_str("\",\"name\":\"");
|
||||
write_json_escaped(&mut buf, &state.poi_data.name[row]);
|
||||
buf.push_str("\",\"category\":\"");
|
||||
write_json_escaped(&mut buf, state.poi_data.category.get(row));
|
||||
buf.push_str("\",\"group\":\"");
|
||||
write_json_escaped(&mut buf, state.poi_data.group.get(row));
|
||||
buf.push_str("\",\"lat\":");
|
||||
buf.push_str(&state.poi_data.lat[row].to_string());
|
||||
buf.push_str(",\"lng\":");
|
||||
buf.push_str(&state.poi_data.lng[row].to_string());
|
||||
buf.push_str(",\"emoji\":\"");
|
||||
write_json_escaped(&mut buf, state.poi_data.emoji.get(row));
|
||||
buf.push_str("\"}");
|
||||
}
|
||||
|
||||
buf.push_str("]}");
|
||||
let pois: Vec<POI> = matching_rows
|
||||
.iter()
|
||||
.map(|&row| POI {
|
||||
id: state.poi_data.id[row].clone(),
|
||||
name: state.poi_data.name[row].clone(),
|
||||
category: state.poi_data.category.get(row).to_string(),
|
||||
group: state.poi_data.group.get(row).to_string(),
|
||||
lat: state.poi_data.lat[row],
|
||||
lng: state.poi_data.lng[row],
|
||||
emoji: state.poi_data.emoji.get(row).to_string(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let elapsed = t0.elapsed();
|
||||
info!(
|
||||
results = matching_rows.len(),
|
||||
results = pois.len(),
|
||||
candidates = row_indices.len(),
|
||||
categories = num_categories,
|
||||
categories_raw = categories_str.as_deref().unwrap_or("-"),
|
||||
|
|
@ -110,12 +110,12 @@ pub async fn get_pois(
|
|||
"GET /api/pois"
|
||||
);
|
||||
|
||||
buf
|
||||
pois
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||
|
||||
Ok(([("content-type", "application/json")], json_body))
|
||||
Ok(Json(POIsResponse { pois }))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
|
|||
|
|
@ -9,15 +9,12 @@ use serde::{Deserialize, Serialize};
|
|||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{
|
||||
DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
||||
DEFAULT_PROPERTIES_LIMIT, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
||||
MAX_PROPERTIES_LIMIT,
|
||||
};
|
||||
use crate::data::EnumFeatureData;
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
use super::parse::h3_cell_bounds;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct HexagonPropertiesParams {
|
||||
pub h3: String,
|
||||
|
|
@ -66,21 +63,25 @@ fn non_empty_string(text: &str) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Look up an enum feature value by trying multiple possible column names.
|
||||
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
|
||||
fn lookup_enum_value(
|
||||
enum_features: &[EnumFeatureData],
|
||||
enum_data: &[u8],
|
||||
num_enums: usize,
|
||||
enum_idx: &FxHashMap<String, usize>,
|
||||
feature_names: &[String],
|
||||
feature_data: &[f32],
|
||||
num_features: usize,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
row: usize,
|
||||
names: &[&str],
|
||||
) -> Option<String> {
|
||||
for name in names {
|
||||
if let Some(&feature_index) = enum_idx.get(*name) {
|
||||
let enum_feature = &enum_features[feature_index];
|
||||
let data_index = enum_data[row * num_enums + feature_index];
|
||||
if data_index != ENUM_NULL {
|
||||
if let Some(value) = enum_feature.values.get(data_index as usize) {
|
||||
return Some(value.clone());
|
||||
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) {
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
let value = feature_data[row * num_features + feat_idx];
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if let Some(str_value) = values.get(idx) {
|
||||
return Some(str_value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -120,7 +121,7 @@ pub async fn get_hexagon_properties(
|
|||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.data.enum_features,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
|
|
@ -131,10 +132,9 @@ pub async fn get_hexagon_properties(
|
|||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||
let num_features = state.data.num_features;
|
||||
let num_enums = state.data.num_enums;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let enum_data_flat = &state.data.enum_data;
|
||||
let enum_features = &state.data.enum_features;
|
||||
let feature_names = &state.data.feature_names;
|
||||
let enum_values = &state.data.enum_values;
|
||||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
||||
|
|
@ -162,8 +162,6 @@ pub async fn get_hexagon_properties(
|
|||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
)
|
||||
{
|
||||
matching_rows.push(row);
|
||||
|
|
@ -185,7 +183,11 @@ pub async fn get_hexagon_properties(
|
|||
.map(|&row| {
|
||||
let mut features = FxHashMap::default();
|
||||
let base = row * num_features;
|
||||
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
|
||||
for (feat_idx, feat_name) in feature_names.iter().enumerate() {
|
||||
// Skip enum features in the generic features map
|
||||
if enum_values.contains_key(&feat_idx) {
|
||||
continue;
|
||||
}
|
||||
let value = feature_data[base + feat_idx];
|
||||
if value.is_finite() {
|
||||
features.insert(feat_name.clone(), value);
|
||||
|
|
@ -197,42 +199,42 @@ pub async fn get_hexagon_properties(
|
|||
postcode: non_empty_string(state.data.postcode(row)),
|
||||
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
|
||||
property_type: lookup_enum_value(
|
||||
enum_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
&state.enum_name_to_idx,
|
||||
feature_names,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Property type", "epc_property_type", "pp_property_type"],
|
||||
),
|
||||
built_form: lookup_enum_value(
|
||||
enum_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
&state.enum_name_to_idx,
|
||||
feature_names,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Property type/built form", "built_form"],
|
||||
),
|
||||
duration: lookup_enum_value(
|
||||
enum_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
&state.enum_name_to_idx,
|
||||
feature_names,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Leashold/Freehold", "duration"],
|
||||
),
|
||||
current_energy_rating: lookup_enum_value(
|
||||
enum_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
&state.enum_name_to_idx,
|
||||
feature_names,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Current energy rating", "current_energy_rating"],
|
||||
),
|
||||
potential_energy_rating: lookup_enum_value(
|
||||
enum_features,
|
||||
enum_data_flat,
|
||||
num_enums,
|
||||
&state.enum_name_to_idx,
|
||||
feature_names,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Potential energy rating", "potential_energy_rating"],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -1,14 +1,6 @@
|
|||
use rustc_hash::FxHashMap;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::data::{POIData, PropertyData};
|
||||
use crate::grid_index::GridIndex;
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub struct POICategoryGroup {
|
||||
pub name: String,
|
||||
pub categories: Vec<String>,
|
||||
}
|
||||
use crate::data::{POICategoryGroup, POIData, PropertyData};
|
||||
use crate::routes::FeaturesResponse;
|
||||
use crate::utils::GridIndex;
|
||||
|
||||
pub struct AppState {
|
||||
pub data: PropertyData,
|
||||
|
|
@ -18,18 +10,14 @@ pub struct AppState {
|
|||
pub h3_cells: Vec<u64>,
|
||||
pub poi_data: POIData,
|
||||
pub poi_grid: GridIndex,
|
||||
/// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
|
||||
/// Precomputed JSON key names: "min_{feature_name}" for each feature
|
||||
pub min_keys: Vec<String>,
|
||||
/// Precomputed JSON key names: "max_{feature_name}" for each numeric feature
|
||||
/// Precomputed JSON key names: "max_{feature_name}" for each feature
|
||||
pub max_keys: Vec<String>,
|
||||
/// Precomputed JSON key names: "min_{enum_name}" for each enum feature
|
||||
pub enum_min_keys: Vec<String>,
|
||||
/// Precomputed JSON key names: "max_{enum_name}" for each enum feature
|
||||
pub enum_max_keys: Vec<String>,
|
||||
/// Precomputed POI category groups (sorted)
|
||||
pub poi_category_groups: Vec<POICategoryGroup>,
|
||||
/// Precomputed map from enum feature name to index in data.enum_features
|
||||
pub enum_name_to_idx: FxHashMap<String, usize>,
|
||||
/// Precomputed features response for /api/features endpoint
|
||||
pub features_response: FeaturesResponse,
|
||||
/// URL of the OG screenshot sidecar service (e.g. http://og-screenshot:8002)
|
||||
pub og_sidecar_url: Option<String>,
|
||||
/// Public-facing URL for absolute og:image URLs (e.g. https://narrowit.schmelczer.dev)
|
||||
|
|
|
|||
|
|
@ -1,251 +0,0 @@
|
|||
#[cfg(test)]
|
||||
mod grid_index_tests {
|
||||
use crate::grid_index::GridIndex;
|
||||
|
||||
#[test]
|
||||
fn query_bounds_fully_below_grid_returns_empty() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let results = grid.query(10.0, -10.0, 20.0, -5.0);
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"Should return empty for bounds fully below grid"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_bounds_fully_above_grid_returns_empty() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let results = grid.query(80.0, 50.0, 90.0, 60.0);
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"Should return empty for bounds fully above grid"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_inverted_bounds_returns_empty() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// south > north
|
||||
let results = grid.query(52.0, 0.0, 49.0, 1.0);
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"Should return empty for inverted bounds"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn for_each_bounds_fully_outside_yields_nothing() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let mut count = 0;
|
||||
grid.for_each_in_bounds(10.0, -10.0, 20.0, -5.0, |_| count += 1);
|
||||
assert_eq!(
|
||||
count, 0,
|
||||
"for_each should yield nothing for out-of-bounds query"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_with_large_cells_outside_returns_empty() {
|
||||
// Previously, out-of-bounds queries with large cell sizes would
|
||||
// scan cell (0,0) which could contain data. Now returns empty.
|
||||
let lat = vec![50.0_f32];
|
||||
let lon = vec![0.0_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 1.0);
|
||||
|
||||
let results = grid.query(0.0, -50.0, 10.0, -40.0);
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"Should return empty even with large cell size"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_within_bounds_returns_correct_results() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let results = grid.query(49.9, -0.1, 51.1, 1.1);
|
||||
assert_eq!(results.len(), 3, "Should return all 3 points within bounds");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_partial_bounds_returns_subset() {
|
||||
let lat = vec![50.0_f32, 51.0, 52.0];
|
||||
let lon = vec![0.0_f32, 0.0, 0.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let results = grid.query(49.9, -0.1, 50.1, 0.1);
|
||||
assert_eq!(results.len(), 1, "Should return only the point at lat=50");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod filter_tests {
|
||||
use crate::data::EnumFeatureData;
|
||||
use crate::filter::{parse_filters, row_passes_filters};
|
||||
|
||||
#[test]
|
||||
fn nan_rows_fail_numeric_filter_even_with_infinite_range() {
|
||||
let feature_names = vec!["price".to_string()];
|
||||
let feature_data = vec![f32::NAN];
|
||||
let enum_features: Vec<EnumFeatureData> = vec![];
|
||||
let enum_data: Vec<u8> = vec![];
|
||||
|
||||
let (numeric, enums) =
|
||||
parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features);
|
||||
assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter");
|
||||
|
||||
let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_data, 0);
|
||||
assert!(!passes, "NaN should fail filter even with infinite range");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_enum_filter_value_rejects_all() {
|
||||
let enum_features = vec![EnumFeatureData {
|
||||
name: "rating".to_string(),
|
||||
values: vec!["A".to_string(), "B".to_string()],
|
||||
}];
|
||||
let feature_names: Vec<String> = vec![];
|
||||
// Row-major enum data: 1 row, 1 enum, value=0 (index into "A")
|
||||
let enum_data: Vec<u8> = vec![0];
|
||||
|
||||
let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features);
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert!(enums[0].allowed.is_empty());
|
||||
|
||||
let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_data, 1);
|
||||
assert!(!passes, "Empty allowed set should reject all rows");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_filter_with_nonexistent_values_produces_empty_allowed() {
|
||||
let enum_features = vec![EnumFeatureData {
|
||||
name: "rating".to_string(),
|
||||
values: vec!["A".to_string(), "B".to_string()],
|
||||
}];
|
||||
let feature_names: Vec<String> = vec![];
|
||||
|
||||
let (_, enums) = parse_filters(Some("rating:X|Y|Z"), &feature_names, &enum_features);
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert!(enums[0].allowed.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_numeric_min_is_silently_skipped() {
|
||||
let feature_names = vec!["price".to_string()];
|
||||
let enum_features: Vec<EnumFeatureData> = vec![];
|
||||
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("price:not_a_number:200"),
|
||||
&feature_names,
|
||||
&enum_features,
|
||||
);
|
||||
assert_eq!(numeric.len(), 0);
|
||||
assert_eq!(enums.len(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod json_tests {
|
||||
#[test]
|
||||
fn json_escaped_postcode_with_quotes_is_valid() {
|
||||
use crate::routes::hexagons::write_json_escaped;
|
||||
|
||||
let mut buf = String::new();
|
||||
buf.push_str("{\"postcode\":\"");
|
||||
write_json_escaped(&mut buf, "SW1A \"test");
|
||||
buf.push_str("\"}");
|
||||
|
||||
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Escaped quote should produce valid JSON: {}",
|
||||
buf
|
||||
);
|
||||
assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A \"test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_escaped_postcode_with_backslash_is_valid() {
|
||||
use crate::routes::hexagons::write_json_escaped;
|
||||
|
||||
let mut buf = String::new();
|
||||
buf.push_str("{\"postcode\":\"");
|
||||
write_json_escaped(&mut buf, "SW1A\\2AA");
|
||||
buf.push_str("\"}");
|
||||
|
||||
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Escaped backslash should produce valid JSON: {}",
|
||||
buf
|
||||
);
|
||||
assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A\\2AA");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nan_is_not_valid_json() {
|
||||
use std::fmt::Write;
|
||||
// Verify that raw NaN in write! is still invalid JSON (documenting the risk
|
||||
// that the is_finite() guard in write_hexagons_json protects against).
|
||||
let mut buf = String::new();
|
||||
write!(buf, "{{\"min_price\":{}}}", f64::NAN).unwrap();
|
||||
|
||||
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||
assert!(result.is_err(), "Raw NaN should produce invalid JSON");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn infinity_is_not_valid_json() {
|
||||
use std::fmt::Write;
|
||||
let mut buf = String::new();
|
||||
write!(buf, "{{\"min_price\":{}}}", f64::INFINITY).unwrap();
|
||||
|
||||
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||
assert!(result.is_err(), "Raw Infinity should produce invalid JSON");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod enum_encoding_tests {
|
||||
#[test]
|
||||
fn u8_cast_wraps_around_beyond_255() {
|
||||
// Documents the underlying u8 wrapping behavior that the truncation
|
||||
// guard in property.rs now prevents.
|
||||
let num_values = 300usize;
|
||||
let indices: Vec<u8> = (0..num_values).map(|index| index as u8).collect();
|
||||
|
||||
assert_eq!(indices[0], indices[256], "u8 wraps: 0 == 256");
|
||||
assert_eq!(indices[1], indices[257], "u8 wraps: 1 == 257");
|
||||
|
||||
use std::collections::HashMap;
|
||||
let values: Vec<String> = (0..num_values).map(|i| format!("val_{}", i)).collect();
|
||||
let value_to_idx: HashMap<&str, u8> = values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, value)| (value.as_str(), index as u8))
|
||||
.collect();
|
||||
|
||||
let unique_indices: std::collections::HashSet<u8> =
|
||||
value_to_idx.values().cloned().collect();
|
||||
assert!(
|
||||
unique_indices.len() < num_values,
|
||||
"Without the truncation guard, {} values produce only {} unique u8 indices",
|
||||
num_values,
|
||||
unique_indices.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
7
server-rs/src/utils.rs
Normal file
7
server-rs/src/utils.rs
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
mod grid_index;
|
||||
mod hash;
|
||||
mod interned_column;
|
||||
|
||||
pub use grid_index::GridIndex;
|
||||
pub use hash::{generate_priorities, splitmix64_hash};
|
||||
pub use interned_column::InternedColumn;
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
use tracing::debug;
|
||||
|
||||
/// Grid-based spatial index for fast rectangle queries over property rows.
|
||||
///
|
||||
/// Divides the bounding box into cells of ~0.01 degrees (~1km).
|
||||
|
|
@ -19,6 +21,18 @@ pub struct GridIndex {
|
|||
|
||||
impl GridIndex {
|
||||
pub fn build(lat: &[f32], lon: &[f32], cell_size: f32) -> Self {
|
||||
if lat.is_empty() {
|
||||
return GridIndex {
|
||||
min_lat: 0.0,
|
||||
min_lon: 0.0,
|
||||
cell_size,
|
||||
cols: 0,
|
||||
rows: 0,
|
||||
values: Vec::new(),
|
||||
offsets: vec![0],
|
||||
};
|
||||
}
|
||||
|
||||
let mut min_lat = f32::INFINITY;
|
||||
let mut max_lat = f32::NEG_INFINITY;
|
||||
let mut min_lon = f32::INFINITY;
|
||||
|
|
@ -48,7 +62,7 @@ impl GridIndex {
|
|||
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
|
||||
let num_cells = rows * cols;
|
||||
|
||||
tracing::debug!(
|
||||
debug!(
|
||||
rows_grid = rows,
|
||||
cols_grid = cols,
|
||||
total_cells = num_cells,
|
||||
|
|
@ -86,7 +100,7 @@ impl GridIndex {
|
|||
cursors[cell_index] += 1;
|
||||
}
|
||||
|
||||
tracing::debug!("Grid index built (CSR)");
|
||||
debug!("Grid index built (CSR)");
|
||||
|
||||
GridIndex {
|
||||
min_lat,
|
||||
|
|
@ -184,3 +198,33 @@ impl GridIndex {
|
|||
Some((row_min, row_max, col_min, col_max))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn query_returns_correct_indices() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.1);
|
||||
|
||||
let results = grid.query(49.9, -0.1, 50.1, 0.1);
|
||||
assert_eq!(results, vec![0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_outside_bounds_returns_empty() {
|
||||
let lat = vec![50.0_f32];
|
||||
let lon = vec![0.0_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.1);
|
||||
|
||||
assert!(grid.query(0.0, 0.0, 1.0, 1.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_returns_empty_results() {
|
||||
let grid = GridIndex::build(&[], &[], 0.1);
|
||||
assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty());
|
||||
}
|
||||
}
|
||||
39
server-rs/src/utils/hash.rs
Normal file
39
server-rs/src/utils/hash.rs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/// Generate a deterministic pseudo-random priority value from an index using splitmix64.
|
||||
/// Used for shuffling rows in a deterministic but random-looking order.
|
||||
#[inline]
|
||||
pub fn splitmix64_hash(index: usize) -> u32 {
|
||||
let mut hash = (index as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15);
|
||||
hash = (hash ^ (hash >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
|
||||
hash = (hash ^ (hash >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
|
||||
hash = hash ^ (hash >> 31);
|
||||
hash as u32
|
||||
}
|
||||
|
||||
/// Generate priority values for a range of indices.
|
||||
pub fn generate_priorities(row_count: usize) -> Vec<u32> {
|
||||
(0..row_count).map(splitmix64_hash).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn deterministic() {
|
||||
assert_eq!(splitmix64_hash(0), splitmix64_hash(0));
|
||||
assert_eq!(splitmix64_hash(12345), splitmix64_hash(12345));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_inputs_differ() {
|
||||
assert_ne!(splitmix64_hash(0), splitmix64_hash(1));
|
||||
assert_ne!(splitmix64_hash(100), splitmix64_hash(101));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_priorities_length() {
|
||||
assert_eq!(generate_priorities(0).len(), 0);
|
||||
assert_eq!(generate_priorities(5).len(), 5);
|
||||
assert_eq!(generate_priorities(1000).len(), 1000);
|
||||
}
|
||||
}
|
||||
68
server-rs/src/utils/interned_column.rs
Normal file
68
server-rs/src/utils/interned_column.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/// Interned string column: a small set of unique values indexed by u16 per row.
|
||||
pub struct InternedColumn {
|
||||
pub values: Vec<String>,
|
||||
pub indices: Vec<u16>,
|
||||
}
|
||||
|
||||
impl InternedColumn {
|
||||
pub fn build(raw: &[String]) -> Self {
|
||||
let mut unique_map: rustc_hash::FxHashMap<&str, u16> = rustc_hash::FxHashMap::default();
|
||||
let mut values: Vec<String> = Vec::new();
|
||||
let mut indices = Vec::with_capacity(raw.len());
|
||||
|
||||
for text in raw {
|
||||
let idx = if let Some(&existing) = unique_map.get(text.as_str()) {
|
||||
existing
|
||||
} else {
|
||||
assert!(
|
||||
values.len() < u16::MAX as usize,
|
||||
"InternedColumn overflow: more than {} unique values",
|
||||
u16::MAX
|
||||
);
|
||||
let idx = values.len() as u16;
|
||||
values.push(text.clone());
|
||||
unique_map.insert(text.as_str(), idx);
|
||||
idx
|
||||
};
|
||||
indices.push(idx);
|
||||
}
|
||||
|
||||
InternedColumn { values, indices }
|
||||
}
|
||||
|
||||
/// Resolve the string for a given row.
|
||||
pub fn get(&self, row: usize) -> &str {
|
||||
&self.values[self.indices[row] as usize]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic_interning() {
|
||||
let raw: Vec<String> = vec!["a".into(), "b".into(), "a".into(), "c".into(), "b".into()];
|
||||
let col = InternedColumn::build(&raw);
|
||||
|
||||
assert_eq!(col.values, vec!["a", "b", "c"]);
|
||||
assert_eq!(col.indices, vec![0, 1, 0, 2, 1]);
|
||||
assert_eq!(col.get(0), "a");
|
||||
assert_eq!(col.get(2), "a");
|
||||
assert_eq!(col.get(3), "c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
let col = InternedColumn::build(&[]);
|
||||
assert!(col.values.is_empty());
|
||||
assert!(col.indices.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "InternedColumn overflow")]
|
||||
fn u16_overflow_panics() {
|
||||
let raw: Vec<String> = (0..=u16::MAX as u32).map(|i| i.to_string()).collect();
|
||||
let _col = InternedColumn::build(&raw);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue