Optimise
This commit is contained in:
parent
9179acd4cd
commit
2c613dc0d1
14 changed files with 376 additions and 188 deletions
|
|
@ -105,4 +105,9 @@ righmove lins
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
how to handle too many pois
|
how to handle too many pois
|
||||||
|
|
||||||
|
|
||||||
|
fix zoopla links
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -600,6 +600,12 @@ export default function App() {
|
||||||
bounds: boundsStr,
|
bounds: boundsStr,
|
||||||
});
|
});
|
||||||
if (filtersStr) params.set('filters', filtersStr);
|
if (filtersStr) params.set('filters', filtersStr);
|
||||||
|
// Only request data for the actively viewed feature (reduces bandwidth)
|
||||||
|
if (viewFeature) {
|
||||||
|
params.set('fields', viewFeature);
|
||||||
|
} else {
|
||||||
|
params.set('fields', '');
|
||||||
|
}
|
||||||
const res = await fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, {
|
const res = await fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, {
|
||||||
signal: abortControllerRef.current.signal,
|
signal: abortControllerRef.current.signal,
|
||||||
});
|
});
|
||||||
|
|
@ -619,7 +625,7 @@ export default function App() {
|
||||||
clearTimeout(debounceRef.current);
|
clearTimeout(debounceRef.current);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}, [resolution, bounds, filters, buildFilterParam]);
|
}, [resolution, bounds, filters, buildFilterParam, viewFeature]);
|
||||||
|
|
||||||
// During slider drag, use the expanded dataset (without active feature filter)
|
// During slider drag, use the expanded dataset (without active feature filter)
|
||||||
// so both narrowing and expanding are visible. Otherwise use server-filtered data.
|
// so both narrowing and expanding are visible. Otherwise use server-filtered data.
|
||||||
|
|
@ -745,6 +751,8 @@ export default function App() {
|
||||||
const boundsStr = `${bounds.south},${bounds.west},${bounds.north},${bounds.east}`;
|
const boundsStr = `${bounds.south},${bounds.west},${bounds.north},${bounds.east}`;
|
||||||
const params = new URLSearchParams({ resolution: resolution.toString(), bounds: boundsStr });
|
const params = new URLSearchParams({ resolution: resolution.toString(), bounds: boundsStr });
|
||||||
if (filtersStr) params.set('filters', filtersStr);
|
if (filtersStr) params.set('filters', filtersStr);
|
||||||
|
// Only request the dragged feature's data
|
||||||
|
params.set('fields', name);
|
||||||
|
|
||||||
fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, {
|
fetch(`${getApiBaseUrl()}/api/hexagons?${params}`, {
|
||||||
signal: dragAbortRef.current.signal,
|
signal: dragAbortRef.current.signal,
|
||||||
|
|
@ -786,7 +794,7 @@ export default function App() {
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const fetchHexagonStats = useCallback(
|
const fetchHexagonStats = useCallback(
|
||||||
async (h3: string, res: number, signal?: AbortSignal) => {
|
async (h3: string, res: number, signal?: AbortSignal, fields?: string[]) => {
|
||||||
const params = new URLSearchParams({
|
const params = new URLSearchParams({
|
||||||
h3,
|
h3,
|
||||||
resolution: res.toString(),
|
resolution: res.toString(),
|
||||||
|
|
@ -805,6 +813,9 @@ export default function App() {
|
||||||
.join(',');
|
.join(',');
|
||||||
params.append('filters', filterStr);
|
params.append('filters', filterStr);
|
||||||
}
|
}
|
||||||
|
if (fields) {
|
||||||
|
params.set('fields', fields.join(','));
|
||||||
|
}
|
||||||
const response = await fetch(`${getApiBaseUrl()}/api/hexagon-stats?${params}`, { signal });
|
const response = await fetch(`${getApiBaseUrl()}/api/hexagon-stats?${params}`, { signal });
|
||||||
return (await response.json()) as HexagonStatsResponse;
|
return (await response.json()) as HexagonStatsResponse;
|
||||||
},
|
},
|
||||||
|
|
@ -903,7 +914,9 @@ export default function App() {
|
||||||
try {
|
try {
|
||||||
if (rightPaneTab === 'area') {
|
if (rightPaneTab === 'area') {
|
||||||
setLoadingHoveredAreaStats(true);
|
setLoadingHoveredAreaStats(true);
|
||||||
const stats = await fetchHexagonStats(h3, resolution, signal);
|
// On hover, only fetch stats for features that have active filters
|
||||||
|
const hoverFields = Object.keys(filters);
|
||||||
|
const stats = await fetchHexagonStats(h3, resolution, signal, hoverFields.length > 0 ? hoverFields : undefined);
|
||||||
if (!signal.aborted) setHoveredAreaStats(stats);
|
if (!signal.aborted) setHoveredAreaStats(stats);
|
||||||
} else if (rightPaneTab === 'properties') {
|
} else if (rightPaneTab === 'properties') {
|
||||||
const params = new URLSearchParams({
|
const params = new URLSearchParams({
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
const DOMAIN = 'narrowit.schmelczer.dev';
|
const DOMAIN = 'narrowit.schmelczer.dev';
|
||||||
const ENDPOINT = '/status';
|
const ENDPOINT = '/status';
|
||||||
|
const IS_DEV = process.env.NODE_ENV !== 'production';
|
||||||
|
|
||||||
type EventOptions = {
|
type EventOptions = {
|
||||||
props?: Record<string, string | number | boolean>;
|
props?: Record<string, string | number | boolean>;
|
||||||
|
|
@ -7,6 +8,8 @@ type EventOptions = {
|
||||||
};
|
};
|
||||||
|
|
||||||
function sendEvent(name: string, options?: EventOptions) {
|
function sendEvent(name: string, options?: EventOptions) {
|
||||||
|
if (IS_DEV) return;
|
||||||
|
|
||||||
const payload: Record<string, unknown> = {
|
const payload: Record<string, unknown> = {
|
||||||
n: name,
|
n: name,
|
||||||
u: window.location.href,
|
u: window.location.href,
|
||||||
|
|
|
||||||
|
|
@ -51,12 +51,6 @@ module.exports = (env, argv) => {
|
||||||
context: ['/api'],
|
context: ['/api'],
|
||||||
target: 'http://localhost:8001',
|
target: 'http://localhost:8001',
|
||||||
},
|
},
|
||||||
{
|
|
||||||
context: ['/status'],
|
|
||||||
target: 'https://stats.schmelczer.dev',
|
|
||||||
changeOrigin: true,
|
|
||||||
pathRewrite: { '^/status': '/api/event' },
|
|
||||||
},
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
pub const HISTOGRAM_BINS: usize = 100;
|
pub const HISTOGRAM_BINS: usize = 100;
|
||||||
|
|
||||||
pub const H3_PRECOMPUTE_MIN: u8 = 7;
|
|
||||||
pub const H3_PRECOMPUTE_MAX: u8 = 12;
|
pub const H3_PRECOMPUTE_MAX: u8 = 12;
|
||||||
pub const H3_REQUEST_MIN: u8 = 4;
|
pub const H3_REQUEST_MIN: u8 = 4;
|
||||||
pub const H3_REQUEST_MAX: u8 = 12;
|
pub const H3_REQUEST_MAX: u8 = 12;
|
||||||
|
|
|
||||||
|
|
@ -74,13 +74,14 @@ pub fn row_passes_filters(
|
||||||
enum_filters: &[ParsedEnumFilter],
|
enum_filters: &[ParsedEnumFilter],
|
||||||
feature_data: &[f32],
|
feature_data: &[f32],
|
||||||
num_features: usize,
|
num_features: usize,
|
||||||
enum_features: &[EnumFeatureData],
|
enum_data: &[u8],
|
||||||
|
num_enums: usize,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
filters.iter().all(|filter| {
|
filters.iter().all(|filter| {
|
||||||
let value = feature_data[row * num_features + filter.feat_idx];
|
let value = feature_data[row * num_features + filter.feat_idx];
|
||||||
value.is_finite() && value >= filter.min && value <= filter.max
|
value.is_finite() && value >= filter.min && value <= filter.max
|
||||||
}) && enum_filters.iter().all(|enum_filter| {
|
}) && enum_filters.iter().all(|enum_filter| {
|
||||||
let value = enum_features[enum_filter.enum_idx].data[row];
|
let value = enum_data[row * num_enums + enum_filter.enum_idx];
|
||||||
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,20 @@
|
||||||
/// Grid-based spatial index for fast rectangle queries over property rows.
|
/// Grid-based spatial index for fast rectangle queries over property rows.
|
||||||
///
|
///
|
||||||
/// Divides the UK bounding box into cells of ~0.01 degrees (~1km),
|
/// Divides the bounding box into cells of ~0.01 degrees (~1km).
|
||||||
/// each storing indices of rows whose lat/lon falls within that cell.
|
/// Uses a Compressed Sparse Row (CSR) layout: a single flat `values` array
|
||||||
|
/// plus an `offsets` array so that cell `i` owns `values[offsets[i]..offsets[i+1]]`.
|
||||||
|
/// This eliminates per-cell Vec overhead (24 bytes each for ptr+len+cap).
|
||||||
pub struct GridIndex {
|
pub struct GridIndex {
|
||||||
min_lat: f32,
|
min_lat: f32,
|
||||||
min_lon: f32,
|
min_lon: f32,
|
||||||
cell_size: f32,
|
cell_size: f32,
|
||||||
cols: usize,
|
cols: usize,
|
||||||
rows: usize,
|
rows: usize,
|
||||||
/// cells[row * cols + col] = vec of row indices
|
/// Flat array of row indices, grouped by cell.
|
||||||
cells: Vec<Vec<u32>>,
|
values: Vec<u32>,
|
||||||
|
/// offsets[i] is the start index in `values` for cell i.
|
||||||
|
/// offsets[num_cells] is values.len() (sentinel).
|
||||||
|
offsets: Vec<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GridIndex {
|
impl GridIndex {
|
||||||
|
|
@ -41,25 +46,47 @@ impl GridIndex {
|
||||||
|
|
||||||
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
|
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
|
||||||
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
|
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
|
||||||
|
let num_cells = rows * cols;
|
||||||
|
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
rows_grid = rows,
|
rows_grid = rows,
|
||||||
cols_grid = cols,
|
cols_grid = cols,
|
||||||
total_cells = rows * cols,
|
total_cells = num_cells,
|
||||||
cell_size,
|
cell_size,
|
||||||
"Building grid index"
|
"Building grid index (CSR)"
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut cells: Vec<Vec<u32>> = vec![Vec::new(); rows * cols];
|
// First pass: count items per cell
|
||||||
|
let mut counts = vec![0u32; num_cells];
|
||||||
|
for index in 0..lat.len() {
|
||||||
|
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||||
|
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
||||||
|
counts[grid_row * cols + grid_col] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build offsets from counts (prefix sum)
|
||||||
|
let mut offsets = Vec::with_capacity(num_cells + 1);
|
||||||
|
let mut running = 0u32;
|
||||||
|
for &count in &counts {
|
||||||
|
offsets.push(running);
|
||||||
|
running += count;
|
||||||
|
}
|
||||||
|
offsets.push(running);
|
||||||
|
let total = running as usize;
|
||||||
|
|
||||||
|
// Second pass: fill values using write cursors
|
||||||
|
let mut cursors = offsets[..num_cells].to_vec();
|
||||||
|
let mut values = vec![0u32; total];
|
||||||
for index in 0..lat.len() {
|
for index in 0..lat.len() {
|
||||||
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||||
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
||||||
let cell_index = grid_row * cols + grid_col;
|
let cell_index = grid_row * cols + grid_col;
|
||||||
cells[cell_index].push(index as u32);
|
let pos = cursors[cell_index] as usize;
|
||||||
|
values[pos] = index as u32;
|
||||||
|
cursors[cell_index] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::debug!("Grid index built");
|
tracing::debug!("Grid index built (CSR)");
|
||||||
|
|
||||||
GridIndex {
|
GridIndex {
|
||||||
min_lat,
|
min_lat,
|
||||||
|
|
@ -67,7 +94,8 @@ impl GridIndex {
|
||||||
cell_size,
|
cell_size,
|
||||||
cols,
|
cols,
|
||||||
rows,
|
rows,
|
||||||
cells,
|
values,
|
||||||
|
offsets,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -83,7 +111,10 @@ impl GridIndex {
|
||||||
for row in row_min..=row_max {
|
for row in row_min..=row_max {
|
||||||
let row_start = row * self.cols;
|
let row_start = row * self.cols;
|
||||||
for col in col_min..=col_max {
|
for col in col_min..=col_max {
|
||||||
result.extend_from_slice(&self.cells[row_start + col]);
|
let cell_idx = row_start + col;
|
||||||
|
let start = self.offsets[cell_idx] as usize;
|
||||||
|
let end = self.offsets[cell_idx + 1] as usize;
|
||||||
|
result.extend_from_slice(&self.values[start..end]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -108,7 +139,10 @@ impl GridIndex {
|
||||||
for row in row_min..=row_max {
|
for row in row_min..=row_max {
|
||||||
let row_start = row * self.cols;
|
let row_start = row * self.cols;
|
||||||
for col in col_min..=col_max {
|
for col in col_min..=col_max {
|
||||||
for &row_idx in &self.cells[row_start + col] {
|
let cell_idx = row_start + col;
|
||||||
|
let start = self.offsets[cell_idx] as usize;
|
||||||
|
let end = self.offsets[cell_idx + 1] as usize;
|
||||||
|
for &row_idx in &self.values[start..end] {
|
||||||
callback(row_idx);
|
callback(row_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -72,8 +72,7 @@ async fn main() -> anyhow::Result<()> {
|
||||||
let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, consts::GRID_CELL_SIZE);
|
let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, consts::GRID_CELL_SIZE);
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"Precomputing H3 cells for resolutions {}-{}",
|
"Precomputing H3 cells at resolution {}",
|
||||||
consts::H3_PRECOMPUTE_MIN,
|
|
||||||
consts::H3_PRECOMPUTE_MAX
|
consts::H3_PRECOMPUTE_MAX
|
||||||
);
|
);
|
||||||
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
|
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ use axum::response::IntoResponse;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::consts::{ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
use crate::consts::{ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||||
use crate::filter::{parse_filters, row_passes_filters};
|
use crate::filter::{parse_filters, row_passes_filters};
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
|
@ -19,6 +19,10 @@ pub struct HexagonStatsParams {
|
||||||
pub h3: String,
|
pub h3: String,
|
||||||
pub resolution: u8,
|
pub resolution: u8,
|
||||||
pub filters: Option<String>,
|
pub filters: Option<String>,
|
||||||
|
/// Comma-separated feature names to include in stats response.
|
||||||
|
/// When present (even if empty), only listed features are computed.
|
||||||
|
/// When absent, all features are returned (backward compatible).
|
||||||
|
pub fields: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_hexagon_stats(
|
pub async fn get_hexagon_stats(
|
||||||
|
|
@ -45,8 +49,6 @@ pub async fn get_hexagon_stats(
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
let resolution_idx = resolution as usize;
|
|
||||||
|
|
||||||
let h3_str = params.h3.clone();
|
let h3_str = params.h3.clone();
|
||||||
let filters_str = params.filters.clone();
|
let filters_str = params.filters.clone();
|
||||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||||
|
|
@ -56,42 +58,58 @@ pub async fn get_hexagon_stats(
|
||||||
);
|
);
|
||||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||||
|
|
||||||
|
// Parse optional `fields` param into sets of feature names.
|
||||||
|
// None = include all, Some = only include listed features.
|
||||||
|
let field_set: Option<std::collections::HashSet<String>> = params.fields.as_ref().map(|fields_str| {
|
||||||
|
fields_str
|
||||||
|
.split(',')
|
||||||
|
.map(|field| field.trim().to_string())
|
||||||
|
.filter(|field| !field.is_empty())
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
let result = tokio::task::spawn_blocking(move || {
|
let result = tokio::task::spawn_blocking(move || {
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
let precomputed: Option<&[u64]> = state
|
let precomputed = &state.h3_cells;
|
||||||
.h3_cells
|
|
||||||
.get(resolution_idx)
|
|
||||||
.filter(|cells| !cells.is_empty())
|
|
||||||
.map(|cells| cells.as_slice());
|
|
||||||
let h3_res = h3o::Resolution::try_from(resolution)
|
let h3_res = h3o::Resolution::try_from(resolution)
|
||||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||||
|
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||||
let num_features = state.data.num_features;
|
let num_features = state.data.num_features;
|
||||||
|
let num_enums = state.data.num_enums;
|
||||||
let feature_data = &state.data.feature_data;
|
let feature_data = &state.data.feature_data;
|
||||||
|
let enum_data = &state.data.enum_data;
|
||||||
let enum_features = &state.data.enum_features;
|
let enum_features = &state.data.enum_features;
|
||||||
|
|
||||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||||
|
|
||||||
|
// Resolve cell at requested resolution from precomputed max-resolution cell
|
||||||
|
let cell_for_row = |row: usize| -> u64 {
|
||||||
|
let max_cell = precomputed[row];
|
||||||
|
if !need_parent || max_cell == 0 {
|
||||||
|
return max_cell;
|
||||||
|
}
|
||||||
|
h3o::CellIndex::try_from(max_cell)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ci| ci.parent(h3_res))
|
||||||
|
.map(u64::from)
|
||||||
|
.unwrap_or(0)
|
||||||
|
};
|
||||||
|
|
||||||
// Collect matching rows
|
// Collect matching rows
|
||||||
let mut matching_rows: Vec<usize> = Vec::new();
|
let mut matching_rows: Vec<usize> = Vec::new();
|
||||||
state
|
state
|
||||||
.grid
|
.grid
|
||||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||||
let row = row_idx as usize;
|
let row = row_idx as usize;
|
||||||
let row_cell = if let Some(h3_data) = precomputed {
|
if cell_for_row(row) == cell_u64
|
||||||
h3_data[row]
|
|
||||||
} else {
|
|
||||||
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
|
||||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
|
||||||
.unwrap_or(0)
|
|
||||||
};
|
|
||||||
if row_cell == cell_u64
|
|
||||||
&& row_passes_filters(
|
&& row_passes_filters(
|
||||||
row,
|
row,
|
||||||
&parsed_filters,
|
&parsed_filters,
|
||||||
&parsed_enum_filters,
|
&parsed_enum_filters,
|
||||||
feature_data,
|
feature_data,
|
||||||
num_features,
|
num_features,
|
||||||
enum_features,
|
enum_data,
|
||||||
|
num_enums,
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
matching_rows.push(row);
|
matching_rows.push(row);
|
||||||
|
|
@ -109,6 +127,12 @@ pub async fn get_hexagon_stats(
|
||||||
output.push_str(",\"numeric_features\":[");
|
output.push_str(",\"numeric_features\":[");
|
||||||
let mut first_numeric = true;
|
let mut first_numeric = true;
|
||||||
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
|
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
|
||||||
|
// Skip features not in the requested set (when fields param is present)
|
||||||
|
if let Some(ref set) = field_set {
|
||||||
|
if !set.contains(feature_name.as_str()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
let global_stats = &state.data.feature_stats[feature_index];
|
let global_stats = &state.data.feature_stats[feature_index];
|
||||||
let histogram_min = global_stats.histogram.min;
|
let histogram_min = global_stats.histogram.min;
|
||||||
let histogram_max = global_stats.histogram.max;
|
let histogram_max = global_stats.histogram.max;
|
||||||
|
|
@ -178,15 +202,20 @@ pub async fn get_hexagon_stats(
|
||||||
output.push_str("],\"enum_features\":[");
|
output.push_str("],\"enum_features\":[");
|
||||||
let mut first_enum = true;
|
let mut first_enum = true;
|
||||||
for enum_feature in enum_features {
|
for enum_feature in enum_features {
|
||||||
|
// Skip enum features not in the requested set
|
||||||
|
if let Some(ref set) = field_set {
|
||||||
|
if !set.contains(enum_feature.name.as_str()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
|
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
|
||||||
Some(&index) => index,
|
Some(&index) => index,
|
||||||
None => continue,
|
None => continue,
|
||||||
};
|
};
|
||||||
let enum_data = &state.data.enum_features[enum_index];
|
|
||||||
|
|
||||||
let mut value_counts = vec![0u64; enum_data.values.len()];
|
let mut value_counts = vec![0u64; enum_feature.values.len()];
|
||||||
for &row in &matching_rows {
|
for &row in &matching_rows {
|
||||||
let value = enum_data.data[row];
|
let value = enum_data[row * num_enums + enum_index];
|
||||||
if value != ENUM_NULL && (value as usize) < value_counts.len() {
|
if value != ENUM_NULL && (value as usize) < value_counts.len() {
|
||||||
value_counts[value as usize] += 1;
|
value_counts[value as usize] += 1;
|
||||||
}
|
}
|
||||||
|
|
@ -215,7 +244,7 @@ pub async fn get_hexagon_stats(
|
||||||
output.push(',');
|
output.push(',');
|
||||||
}
|
}
|
||||||
first_value = false;
|
first_value = false;
|
||||||
write_json_string(&mut output, &enum_data.values[value_index]);
|
write_json_string(&mut output, &enum_feature.values[value_index]);
|
||||||
write!(output, ":{}", count).unwrap();
|
write!(output, ":{}", count).unwrap();
|
||||||
}
|
}
|
||||||
output.push_str("}}");
|
output.push_str("}}");
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@ use serde::Deserialize;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::consts::{
|
use crate::consts::{
|
||||||
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN,
|
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX,
|
||||||
POSTCODE_MIN_RESOLUTION,
|
H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION,
|
||||||
};
|
};
|
||||||
use crate::filter::parse_filters;
|
use crate::filter::parse_filters;
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
|
|
@ -39,16 +39,21 @@ pub struct HexagonParams {
|
||||||
/// Comma-separated filters: `name:min:max,...`
|
/// Comma-separated filters: `name:min:max,...`
|
||||||
/// Rows must have non-NaN values within [min,max] for each filter.
|
/// Rows must have non-NaN values within [min,max] for each filter.
|
||||||
filters: Option<String>,
|
filters: Option<String>,
|
||||||
|
/// Comma-separated feature names to include in min/max aggregation.
|
||||||
|
/// When present (even if empty), only listed features are aggregated and written.
|
||||||
|
/// When absent, all features are included (backward compatible).
|
||||||
|
fields: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Per-cell accumulator for aggregating features
|
/// Per-cell accumulator for aggregating features.
|
||||||
|
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
|
||||||
struct CellAgg {
|
struct CellAgg {
|
||||||
count: u32,
|
count: u32,
|
||||||
mins: Vec<f32>,
|
mins: Box<[f32]>,
|
||||||
maxs: Vec<f32>,
|
maxs: Box<[f32]>,
|
||||||
/// Min/max ordinal indices for enum features (255 = no data yet)
|
/// Min/max ordinal indices for enum features (255 = no data yet)
|
||||||
enum_mins: Vec<u8>,
|
enum_mins: Box<[u8]>,
|
||||||
enum_maxs: Vec<u8>,
|
enum_maxs: Box<[u8]>,
|
||||||
/// Most common postcode in this cell (only tracked at high resolutions)
|
/// Most common postcode in this cell (only tracked at high resolutions)
|
||||||
postcode: Option<String>,
|
postcode: Option<String>,
|
||||||
postcode_count: u32,
|
postcode_count: u32,
|
||||||
|
|
@ -60,10 +65,10 @@ impl CellAgg {
|
||||||
fn new(num_features: usize, num_enums: usize) -> Self {
|
fn new(num_features: usize, num_enums: usize) -> Self {
|
||||||
CellAgg {
|
CellAgg {
|
||||||
count: 0,
|
count: 0,
|
||||||
mins: vec![f32::INFINITY; num_features],
|
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
|
||||||
maxs: vec![f32::NEG_INFINITY; num_features],
|
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
|
||||||
enum_mins: vec![ENUM_NULL; num_enums],
|
enum_mins: vec![ENUM_NULL; num_enums].into_boxed_slice(),
|
||||||
enum_maxs: vec![0; num_enums],
|
enum_maxs: vec![0; num_enums].into_boxed_slice(),
|
||||||
postcode: None,
|
postcode: None,
|
||||||
postcode_count: 0,
|
postcode_count: 0,
|
||||||
lat_sum: 0.0,
|
lat_sum: 0.0,
|
||||||
|
|
@ -93,9 +98,45 @@ impl CellAgg {
|
||||||
|
|
||||||
/// Track min/max ordinal index for each enum feature in this cell.
|
/// Track min/max ordinal index for each enum feature in this cell.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
|
fn add_enums(&mut self, enum_data: &[u8], row: usize, num_enums: usize) {
|
||||||
for (enum_index, enum_feature) in enum_features.iter().enumerate() {
|
let base = row * num_enums;
|
||||||
let value = enum_feature.data[row];
|
let row_slice = &enum_data[base..base + num_enums];
|
||||||
|
for (enum_index, &value) in row_slice.iter().enumerate() {
|
||||||
|
if value != ENUM_NULL {
|
||||||
|
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||||
|
self.enum_mins[enum_index] = value;
|
||||||
|
}
|
||||||
|
if value > self.enum_maxs[enum_index] {
|
||||||
|
self.enum_maxs[enum_index] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a row, only aggregating the features at the given indices.
|
||||||
|
#[inline]
|
||||||
|
fn add_row_selective(&mut self, feature_data: &[f32], row: usize, num_features: usize, indices: &[usize]) {
|
||||||
|
self.count += 1;
|
||||||
|
let base = row * num_features;
|
||||||
|
for &feat_index in indices {
|
||||||
|
let value = feature_data[base + feat_index];
|
||||||
|
if value.is_finite() {
|
||||||
|
if value < self.mins[feat_index] {
|
||||||
|
self.mins[feat_index] = value;
|
||||||
|
}
|
||||||
|
if value > self.maxs[feat_index] {
|
||||||
|
self.maxs[feat_index] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Track min/max ordinal index for selected enum features only.
|
||||||
|
#[inline]
|
||||||
|
fn add_enums_selective(&mut self, enum_data: &[u8], row: usize, num_enums: usize, indices: &[usize]) {
|
||||||
|
let base = row * num_enums;
|
||||||
|
for &enum_index in indices {
|
||||||
|
let value = enum_data[base + enum_index];
|
||||||
if value != ENUM_NULL {
|
if value != ENUM_NULL {
|
||||||
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||||
self.enum_mins[enum_index] = value;
|
self.enum_mins[enum_index] = value;
|
||||||
|
|
@ -142,6 +183,7 @@ pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
|
||||||
|
|
||||||
/// Write the hexagons JSON response directly to a String buffer,
|
/// Write the hexagons JSON response directly to a String buffer,
|
||||||
/// avoiding serde_json::Value allocations entirely.
|
/// avoiding serde_json::Value allocations entirely.
|
||||||
|
/// When `numeric_indices` / `enum_indices` are Some, only those features are written.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn write_hexagons_json(
|
fn write_hexagons_json(
|
||||||
buf: &mut String,
|
buf: &mut String,
|
||||||
|
|
@ -153,6 +195,8 @@ fn write_hexagons_json(
|
||||||
enum_max_keys: &[String],
|
enum_max_keys: &[String],
|
||||||
num_enums: usize,
|
num_enums: usize,
|
||||||
include_postcode: bool,
|
include_postcode: bool,
|
||||||
|
numeric_indices: Option<&[usize]>,
|
||||||
|
enum_indices: Option<&[usize]>,
|
||||||
) {
|
) {
|
||||||
buf.push_str("{\"features\":[");
|
buf.push_str("{\"features\":[");
|
||||||
let mut first = true;
|
let mut first = true;
|
||||||
|
|
@ -168,24 +212,49 @@ fn write_hexagons_json(
|
||||||
|
|
||||||
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
||||||
|
|
||||||
for feat_index in 0..num_features {
|
if let Some(indices) = numeric_indices {
|
||||||
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
for &feat_index in indices {
|
||||||
let _ = write!(
|
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||||
buf,
|
let _ = write!(
|
||||||
",\"{}\":{},\"{}\":{}",
|
buf,
|
||||||
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
",\"{}\":{},\"{}\":{}",
|
||||||
);
|
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for feat_index in 0..num_features {
|
||||||
|
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||||
|
let _ = write!(
|
||||||
|
buf,
|
||||||
|
",\"{}\":{},\"{}\":{}",
|
||||||
|
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for enum_index in 0..num_enums {
|
if let Some(indices) = enum_indices {
|
||||||
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
for &enum_index in indices {
|
||||||
let _ = write!(
|
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||||
buf,
|
let _ = write!(
|
||||||
",\"{}\":{},\"{}\":{}",
|
buf,
|
||||||
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
",\"{}\":{},\"{}\":{}",
|
||||||
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||||
);
|
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for enum_index in 0..num_enums {
|
||||||
|
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||||
|
let _ = write!(
|
||||||
|
buf,
|
||||||
|
",\"{}\":{},\"{}\":{}",
|
||||||
|
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||||
|
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -253,27 +322,48 @@ pub async fn get_hexagons(
|
||||||
);
|
);
|
||||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||||
|
|
||||||
|
// Parse optional `fields` param into numeric and enum index sets.
|
||||||
|
// If `fields` is absent (None), all features are included.
|
||||||
|
// If `fields` is present (even empty string), only listed features are included.
|
||||||
|
let field_indices: Option<(Vec<usize>, Vec<usize>)> = params.fields.as_ref().map(|fields_str| {
|
||||||
|
let mut numeric_indices = Vec::new();
|
||||||
|
let mut enum_indices = Vec::new();
|
||||||
|
if !fields_str.is_empty() {
|
||||||
|
for name in fields_str.split(',') {
|
||||||
|
let name = name.trim();
|
||||||
|
if name.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(idx) = state.data.feature_names.iter().position(|feat| feat == name) {
|
||||||
|
numeric_indices.push(idx);
|
||||||
|
} else if let Some(&idx) = state.enum_name_to_idx.get(name) {
|
||||||
|
enum_indices.push(idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(numeric_indices, enum_indices)
|
||||||
|
});
|
||||||
|
|
||||||
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
let num_features = state.data.num_features;
|
let num_features = state.data.num_features;
|
||||||
let num_enums = state.data.enum_features.len();
|
let num_enums = state.data.num_enums;
|
||||||
let feature_data = &state.data.feature_data;
|
let feature_data = &state.data.feature_data;
|
||||||
|
let enum_data = &state.data.enum_data;
|
||||||
|
|
||||||
let min_keys = &state.min_keys;
|
let min_keys = &state.min_keys;
|
||||||
let max_keys = &state.max_keys;
|
let max_keys = &state.max_keys;
|
||||||
let enum_min_keys = &state.enum_min_keys;
|
let enum_min_keys = &state.enum_min_keys;
|
||||||
let enum_max_keys = &state.enum_max_keys;
|
let enum_max_keys = &state.enum_max_keys;
|
||||||
|
|
||||||
let h3_cells_for_res: Option<&[u64]> = state
|
let h3_res = h3o::Resolution::try_from(resolution)
|
||||||
.h3_cells
|
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||||
.get(resolution as usize)
|
let precomputed = &state.h3_cells;
|
||||||
.filter(|cells| !cells.is_empty())
|
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||||
.map(|cells| cells.as_slice());
|
|
||||||
|
|
||||||
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||||
|
|
||||||
let enum_features = &state.data.enum_features;
|
|
||||||
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
||||||
|
|
||||||
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
||||||
|
|
@ -283,60 +373,58 @@ pub async fn get_hexagons(
|
||||||
let value = feature_data[row * num_features + filter.feat_idx];
|
let value = feature_data[row * num_features + filter.feat_idx];
|
||||||
value.is_finite() && value >= filter.min && value <= filter.max
|
value.is_finite() && value >= filter.min && value <= filter.max
|
||||||
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
||||||
let value = enum_features[enum_filter.enum_idx].data[row];
|
let value = enum_data[row * num_enums + enum_filter.enum_idx];
|
||||||
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(precomputed) = h3_cells_for_res {
|
// Choose aggregation strategy based on whether fields are specified
|
||||||
state
|
let has_selective = field_indices.is_some();
|
||||||
.grid
|
let (sel_numeric, sel_enum) = field_indices.as_ref().map_or((&[][..], &[][..]), |(ni, ei)| (ni.as_slice(), ei.as_slice()));
|
||||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
|
||||||
let row = row_idx as usize;
|
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
|
||||||
if !row_passes(row) {
|
let aggregation = groups
|
||||||
return;
|
.entry(cell_id)
|
||||||
}
|
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||||
let cell_id = precomputed[row];
|
if has_selective {
|
||||||
let aggregation = groups
|
aggregation.add_row_selective(feature_data, row, num_features, sel_numeric);
|
||||||
.entry(cell_id)
|
aggregation.add_enums_selective(enum_data, row, num_enums, sel_enum);
|
||||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
} else {
|
||||||
aggregation.add_row(feature_data, row, num_features);
|
aggregation.add_row(feature_data, row, num_features);
|
||||||
aggregation.add_enums(enum_features, row);
|
aggregation.add_enums(enum_data, row, num_enums);
|
||||||
if include_postcode {
|
}
|
||||||
aggregation.add_postcode(
|
if include_postcode {
|
||||||
state.data.postcode(row),
|
aggregation.add_postcode(
|
||||||
state.data.lat[row],
|
state.data.postcode(row),
|
||||||
state.data.lon[row],
|
state.data.lat[row],
|
||||||
);
|
state.data.lon[row],
|
||||||
}
|
);
|
||||||
});
|
}
|
||||||
} else {
|
};
|
||||||
let h3_res = h3o::Resolution::try_from(resolution)
|
|
||||||
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
// Resolve cell at requested resolution from precomputed max-resolution cell.
|
||||||
state
|
// For max resolution, use directly; for lower resolutions, derive parent.
|
||||||
.grid
|
let cell_for_row = |row: usize| -> u64 {
|
||||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
let max_cell = precomputed[row];
|
||||||
let row = row_idx as usize;
|
if !need_parent || max_cell == 0 {
|
||||||
if !row_passes(row) {
|
return max_cell;
|
||||||
return;
|
}
|
||||||
}
|
h3o::CellIndex::try_from(max_cell)
|
||||||
let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
.ok()
|
||||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
.and_then(|ci| ci.parent(h3_res))
|
||||||
.unwrap_or(0);
|
.map(u64::from)
|
||||||
let aggregation = groups
|
.unwrap_or(0)
|
||||||
.entry(cell_id)
|
};
|
||||||
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
|
||||||
aggregation.add_row(feature_data, row, num_features);
|
state
|
||||||
aggregation.add_enums(enum_features, row);
|
.grid
|
||||||
if include_postcode {
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||||
aggregation.add_postcode(
|
let row = row_idx as usize;
|
||||||
state.data.postcode(row),
|
if !row_passes(row) {
|
||||||
state.data.lat[row],
|
return;
|
||||||
state.data.lon[row],
|
}
|
||||||
);
|
aggregate_row(&mut groups, cell_for_row(row), row);
|
||||||
}
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let t_agg = t0.elapsed();
|
let t_agg = t0.elapsed();
|
||||||
|
|
||||||
|
|
@ -351,6 +439,8 @@ pub async fn get_hexagons(
|
||||||
enum_max_keys,
|
enum_max_keys,
|
||||||
num_enums,
|
num_enums,
|
||||||
include_postcode,
|
include_postcode,
|
||||||
|
field_indices.as_ref().map(|(ni, _)| ni.as_slice()),
|
||||||
|
field_indices.as_ref().map(|(_, ei)| ei.as_slice()),
|
||||||
);
|
);
|
||||||
|
|
||||||
let t_total = t0.elapsed();
|
let t_total = t0.elapsed();
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,14 @@ use std::sync::Arc;
|
||||||
|
|
||||||
use axum::extract::Query;
|
use axum::extract::Query;
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::Json;
|
use axum::response::{IntoResponse, Json};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::consts::MAX_POIS_PER_REQUEST;
|
use crate::consts::MAX_POIS_PER_REQUEST;
|
||||||
use crate::data::POI;
|
|
||||||
use crate::state::{AppState, POICategoryGroup};
|
use crate::state::{AppState, POICategoryGroup};
|
||||||
|
|
||||||
|
use super::hexagons::write_json_escaped;
|
||||||
use super::parse::parse_bounds;
|
use super::parse::parse_bounds;
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
|
|
@ -19,15 +19,10 @@ pub struct POIParams {
|
||||||
categories: Option<String>,
|
categories: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
pub struct POIsResponse {
|
|
||||||
pois: Vec<POI>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_pois(
|
pub async fn get_pois(
|
||||||
state: Arc<AppState>,
|
state: Arc<AppState>,
|
||||||
Query(params): Query<POIParams>,
|
Query(params): Query<POIParams>,
|
||||||
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||||
let bounds_str = params.bounds.ok_or((
|
let bounds_str = params.bounds.ok_or((
|
||||||
StatusCode::BAD_REQUEST,
|
StatusCode::BAD_REQUEST,
|
||||||
"bounds parameter is required".into(),
|
"bounds parameter is required".into(),
|
||||||
|
|
@ -44,7 +39,7 @@ pub async fn get_pois(
|
||||||
|
|
||||||
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
|
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
|
||||||
|
|
||||||
let result = tokio::task::spawn_blocking(move || {
|
let json_body = tokio::task::spawn_blocking(move || {
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
let row_indices = state.poi_grid.query(south, west, north, east);
|
let row_indices = state.poi_grid.query(south, west, north, east);
|
||||||
|
|
||||||
|
|
@ -64,36 +59,46 @@ pub async fn get_pois(
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
||||||
// Use a power-of-2 sampling step so each POI's inclusion depends
|
|
||||||
// only on its own priority hash, not on what other POIs are in
|
|
||||||
// the viewport. This prevents visible reshuffling when panning.
|
|
||||||
let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
|
let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
|
||||||
let step = ratio.next_power_of_two();
|
let step = ratio.next_power_of_two();
|
||||||
let mask = step - 1;
|
let mask = step - 1;
|
||||||
matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
|
matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
|
||||||
// Statistical noise may leave us slightly over the limit
|
|
||||||
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
||||||
matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
|
matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
|
||||||
matching_rows.truncate(MAX_POIS_PER_REQUEST);
|
matching_rows.truncate(MAX_POIS_PER_REQUEST);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let pois: Vec<POI> = matching_rows
|
// Write JSON directly to string buffer, avoiding intermediate POI allocations
|
||||||
.iter()
|
let mut buf = String::with_capacity(matching_rows.len() * 128);
|
||||||
.map(|&row| POI {
|
buf.push_str("{\"pois\":[");
|
||||||
id: state.poi_data.id[row].clone(),
|
|
||||||
name: state.poi_data.name[row].clone(),
|
for (i, &row) in matching_rows.iter().enumerate() {
|
||||||
category: state.poi_data.category.get(row).to_string(),
|
if i > 0 {
|
||||||
group: state.poi_data.group.get(row).to_string(),
|
buf.push(',');
|
||||||
lat: state.poi_data.lat[row],
|
}
|
||||||
lng: state.poi_data.lng[row],
|
buf.push_str("{\"id\":\"");
|
||||||
emoji: state.poi_data.emoji.get(row).to_string(),
|
write_json_escaped(&mut buf, &state.poi_data.id[row]);
|
||||||
})
|
buf.push_str("\",\"name\":\"");
|
||||||
.collect();
|
write_json_escaped(&mut buf, &state.poi_data.name[row]);
|
||||||
|
buf.push_str("\",\"category\":\"");
|
||||||
|
write_json_escaped(&mut buf, state.poi_data.category.get(row));
|
||||||
|
buf.push_str("\",\"group\":\"");
|
||||||
|
write_json_escaped(&mut buf, state.poi_data.group.get(row));
|
||||||
|
buf.push_str("\",\"lat\":");
|
||||||
|
buf.push_str(&state.poi_data.lat[row].to_string());
|
||||||
|
buf.push_str(",\"lng\":");
|
||||||
|
buf.push_str(&state.poi_data.lng[row].to_string());
|
||||||
|
buf.push_str(",\"emoji\":\"");
|
||||||
|
write_json_escaped(&mut buf, state.poi_data.emoji.get(row));
|
||||||
|
buf.push_str("\"}");
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push_str("]}");
|
||||||
|
|
||||||
let elapsed = t0.elapsed();
|
let elapsed = t0.elapsed();
|
||||||
info!(
|
info!(
|
||||||
results = pois.len(),
|
results = matching_rows.len(),
|
||||||
candidates = row_indices.len(),
|
candidates = row_indices.len(),
|
||||||
categories = num_categories,
|
categories = num_categories,
|
||||||
categories_raw = categories_str.as_deref().unwrap_or("-"),
|
categories_raw = categories_str.as_deref().unwrap_or("-"),
|
||||||
|
|
@ -101,12 +106,12 @@ pub async fn get_pois(
|
||||||
"GET /api/pois"
|
"GET /api/pois"
|
||||||
);
|
);
|
||||||
|
|
||||||
POIsResponse { pois }
|
buf
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||||
|
|
||||||
Ok(Json(result))
|
Ok(([("content-type", "application/json")], json_body))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ use rustc_hash::FxHashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT};
|
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_PROPERTIES_LIMIT};
|
||||||
use crate::data::EnumFeatureData;
|
use crate::data::EnumFeatureData;
|
||||||
use crate::filter::{parse_filters, row_passes_filters};
|
use crate::filter::{parse_filters, row_passes_filters};
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
|
|
@ -65,6 +65,8 @@ fn non_empty_string(text: &str) -> Option<String> {
|
||||||
|
|
||||||
fn lookup_enum_value(
|
fn lookup_enum_value(
|
||||||
enum_features: &[EnumFeatureData],
|
enum_features: &[EnumFeatureData],
|
||||||
|
enum_data: &[u8],
|
||||||
|
num_enums: usize,
|
||||||
enum_idx: &FxHashMap<String, usize>,
|
enum_idx: &FxHashMap<String, usize>,
|
||||||
row: usize,
|
row: usize,
|
||||||
names: &[&str],
|
names: &[&str],
|
||||||
|
|
@ -72,7 +74,7 @@ fn lookup_enum_value(
|
||||||
for name in names {
|
for name in names {
|
||||||
if let Some(&feature_index) = enum_idx.get(*name) {
|
if let Some(&feature_index) = enum_idx.get(*name) {
|
||||||
let enum_feature = &enum_features[feature_index];
|
let enum_feature = &enum_features[feature_index];
|
||||||
let data_index = enum_feature.data[row];
|
let data_index = enum_data[row * num_enums + feature_index];
|
||||||
if data_index != ENUM_NULL {
|
if data_index != ENUM_NULL {
|
||||||
if let Some(value) = enum_feature.values.get(data_index as usize) {
|
if let Some(value) = enum_feature.values.get(data_index as usize) {
|
||||||
return Some(value.clone());
|
return Some(value.clone());
|
||||||
|
|
@ -107,8 +109,6 @@ pub async fn get_hexagon_properties(
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
let resolution_idx = resolution as usize;
|
|
||||||
|
|
||||||
let h3_str = params.h3.clone();
|
let h3_str = params.h3.clone();
|
||||||
let filters_str = params.filters.clone();
|
let filters_str = params.filters.clone();
|
||||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||||
|
|
@ -120,39 +120,44 @@ pub async fn get_hexagon_properties(
|
||||||
|
|
||||||
let result = tokio::task::spawn_blocking(move || {
|
let result = tokio::task::spawn_blocking(move || {
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
let precomputed: Option<&[u64]> = state
|
let precomputed = &state.h3_cells;
|
||||||
.h3_cells
|
|
||||||
.get(resolution_idx)
|
|
||||||
.filter(|cells| !cells.is_empty())
|
|
||||||
.map(|cells| cells.as_slice());
|
|
||||||
let h3_res = h3o::Resolution::try_from(resolution)
|
let h3_res = h3o::Resolution::try_from(resolution)
|
||||||
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
||||||
|
let need_parent = resolution < H3_PRECOMPUTE_MAX;
|
||||||
let num_features = state.data.num_features;
|
let num_features = state.data.num_features;
|
||||||
|
let num_enums = state.data.num_enums;
|
||||||
let feature_data = &state.data.feature_data;
|
let feature_data = &state.data.feature_data;
|
||||||
|
let enum_data_flat = &state.data.enum_data;
|
||||||
let enum_features = &state.data.enum_features;
|
let enum_features = &state.data.enum_features;
|
||||||
|
|
||||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||||
|
|
||||||
|
let cell_for_row = |row: usize| -> u64 {
|
||||||
|
let max_cell = precomputed[row];
|
||||||
|
if !need_parent || max_cell == 0 {
|
||||||
|
return max_cell;
|
||||||
|
}
|
||||||
|
h3o::CellIndex::try_from(max_cell)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ci| ci.parent(h3_res))
|
||||||
|
.map(u64::from)
|
||||||
|
.unwrap_or(0)
|
||||||
|
};
|
||||||
|
|
||||||
let mut matching_rows: Vec<usize> = Vec::new();
|
let mut matching_rows: Vec<usize> = Vec::new();
|
||||||
state
|
state
|
||||||
.grid
|
.grid
|
||||||
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||||
let row = row_idx as usize;
|
let row = row_idx as usize;
|
||||||
let row_cell = if let Some(h3_data) = precomputed {
|
if cell_for_row(row) == cell_u64
|
||||||
h3_data[row]
|
|
||||||
} else {
|
|
||||||
h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64)
|
|
||||||
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
|
||||||
.unwrap_or(0)
|
|
||||||
};
|
|
||||||
if row_cell == cell_u64
|
|
||||||
&& row_passes_filters(
|
&& row_passes_filters(
|
||||||
row,
|
row,
|
||||||
&parsed_filters,
|
&parsed_filters,
|
||||||
&parsed_enum_filters,
|
&parsed_enum_filters,
|
||||||
feature_data,
|
feature_data,
|
||||||
num_features,
|
num_features,
|
||||||
enum_features,
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
matching_rows.push(row);
|
matching_rows.push(row);
|
||||||
|
|
@ -181,33 +186,43 @@ pub async fn get_hexagon_properties(
|
||||||
Property {
|
Property {
|
||||||
address: non_empty_string(state.data.address(row)),
|
address: non_empty_string(state.data.address(row)),
|
||||||
postcode: non_empty_string(state.data.postcode(row)),
|
postcode: non_empty_string(state.data.postcode(row)),
|
||||||
is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
|
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
|
||||||
property_type: lookup_enum_value(
|
property_type: lookup_enum_value(
|
||||||
enum_features,
|
enum_features,
|
||||||
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
&state.enum_name_to_idx,
|
&state.enum_name_to_idx,
|
||||||
row,
|
row,
|
||||||
&["Property type", "epc_property_type", "pp_property_type"],
|
&["Property type", "epc_property_type", "pp_property_type"],
|
||||||
),
|
),
|
||||||
built_form: lookup_enum_value(
|
built_form: lookup_enum_value(
|
||||||
enum_features,
|
enum_features,
|
||||||
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
&state.enum_name_to_idx,
|
&state.enum_name_to_idx,
|
||||||
row,
|
row,
|
||||||
&["Property type/built form", "built_form"],
|
&["Property type/built form", "built_form"],
|
||||||
),
|
),
|
||||||
duration: lookup_enum_value(
|
duration: lookup_enum_value(
|
||||||
enum_features,
|
enum_features,
|
||||||
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
&state.enum_name_to_idx,
|
&state.enum_name_to_idx,
|
||||||
row,
|
row,
|
||||||
&["Leashold/Freehold", "duration"],
|
&["Leashold/Freehold", "duration"],
|
||||||
),
|
),
|
||||||
current_energy_rating: lookup_enum_value(
|
current_energy_rating: lookup_enum_value(
|
||||||
enum_features,
|
enum_features,
|
||||||
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
&state.enum_name_to_idx,
|
&state.enum_name_to_idx,
|
||||||
row,
|
row,
|
||||||
&["Current energy rating", "current_energy_rating"],
|
&["Current energy rating", "current_energy_rating"],
|
||||||
),
|
),
|
||||||
potential_energy_rating: lookup_enum_value(
|
potential_energy_rating: lookup_enum_value(
|
||||||
enum_features,
|
enum_features,
|
||||||
|
enum_data_flat,
|
||||||
|
num_enums,
|
||||||
&state.enum_name_to_idx,
|
&state.enum_name_to_idx,
|
||||||
row,
|
row,
|
||||||
&["Potential energy rating", "potential_energy_rating"],
|
&["Potential energy rating", "potential_energy_rating"],
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,9 @@ pub struct POICategoryGroup {
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
pub data: PropertyData,
|
pub data: PropertyData,
|
||||||
pub grid: GridIndex,
|
pub grid: GridIndex,
|
||||||
/// h3_cells[resolution][row_idx] = precomputed H3 cell ID.
|
/// h3_cells[row_idx] = precomputed H3 cell ID at max resolution (12).
|
||||||
/// Empty Vec for resolutions not precomputed.
|
/// Parent cells for lower resolutions derived via CellIndex::parent().
|
||||||
pub h3_cells: Vec<Vec<u64>>,
|
pub h3_cells: Vec<u64>,
|
||||||
pub poi_data: POIData,
|
pub poi_data: POIData,
|
||||||
pub poi_grid: GridIndex,
|
pub poi_grid: GridIndex,
|
||||||
/// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
|
/// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
|
||||||
|
|
|
||||||
|
|
@ -102,12 +102,13 @@ mod filter_tests {
|
||||||
let feature_names = vec!["price".to_string()];
|
let feature_names = vec!["price".to_string()];
|
||||||
let feature_data = vec![f32::NAN];
|
let feature_data = vec![f32::NAN];
|
||||||
let enum_features: Vec<EnumFeatureData> = vec![];
|
let enum_features: Vec<EnumFeatureData> = vec![];
|
||||||
|
let enum_data: Vec<u8> = vec![];
|
||||||
|
|
||||||
let (numeric, enums) =
|
let (numeric, enums) =
|
||||||
parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features);
|
parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features);
|
||||||
assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter");
|
assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter");
|
||||||
|
|
||||||
let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_features);
|
let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_data, 0);
|
||||||
assert!(!passes, "NaN should fail filter even with infinite range");
|
assert!(!passes, "NaN should fail filter even with infinite range");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -116,15 +117,16 @@ mod filter_tests {
|
||||||
let enum_features = vec![EnumFeatureData {
|
let enum_features = vec![EnumFeatureData {
|
||||||
name: "rating".to_string(),
|
name: "rating".to_string(),
|
||||||
values: vec!["A".to_string(), "B".to_string()],
|
values: vec!["A".to_string(), "B".to_string()],
|
||||||
data: vec![0],
|
|
||||||
}];
|
}];
|
||||||
let feature_names: Vec<String> = vec![];
|
let feature_names: Vec<String> = vec![];
|
||||||
|
// Row-major enum data: 1 row, 1 enum, value=0 (index into "A")
|
||||||
|
let enum_data: Vec<u8> = vec![0];
|
||||||
|
|
||||||
let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features);
|
let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features);
|
||||||
assert_eq!(enums.len(), 1);
|
assert_eq!(enums.len(), 1);
|
||||||
assert!(enums[0].allowed.is_empty());
|
assert!(enums[0].allowed.is_empty());
|
||||||
|
|
||||||
let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_features);
|
let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_data, 1);
|
||||||
assert!(!passes, "Empty allowed set should reject all rows");
|
assert!(!passes, "Empty allowed set should reject all rows");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -133,7 +135,6 @@ mod filter_tests {
|
||||||
let enum_features = vec![EnumFeatureData {
|
let enum_features = vec![EnumFeatureData {
|
||||||
name: "rating".to_string(),
|
name: "rating".to_string(),
|
||||||
values: vec!["A".to_string(), "B".to_string()],
|
values: vec!["A".to_string(), "B".to_string()],
|
||||||
data: vec![0],
|
|
||||||
}];
|
}];
|
||||||
let feature_names: Vec<String> = vec![];
|
let feature_names: Vec<String> = vec![];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue