perfect-postcode/server-rs/src/utils/grid_index.rs

230 lines
6.9 KiB
Rust

use tracing::debug;
/// Grid-based spatial index for fast rectangle queries over property rows.
///
/// Divides the bounding box into cells of ~0.01 degrees (~1km).
/// Uses a Compressed Sparse Row (CSR) layout: a single flat `values` array
/// plus an `offsets` array so that cell `i` owns `values[offsets[i]..offsets[i+1]]`.
/// This eliminates per-cell Vec overhead (24 bytes each for ptr+len+cap).
pub struct GridIndex {
min_lat: f32,
min_lon: f32,
cell_size: f32,
cols: usize,
rows: usize,
/// Flat array of row indices, grouped by cell.
values: Vec<u32>,
/// offsets[i] is the start index in `values` for cell i.
/// offsets[num_cells] is values.len() (sentinel).
offsets: Vec<u32>,
}
impl GridIndex {
pub fn build(lat: &[f32], lon: &[f32], cell_size: f32) -> Self {
if lat.is_empty() {
return GridIndex {
min_lat: 0.0,
min_lon: 0.0,
cell_size,
cols: 0,
rows: 0,
values: Vec::new(),
offsets: vec![0],
};
}
let mut min_lat = f32::INFINITY;
let mut max_lat = f32::NEG_INFINITY;
let mut min_lon = f32::INFINITY;
let mut max_lon = f32::NEG_INFINITY;
for index in 0..lat.len() {
if lat[index] < min_lat {
min_lat = lat[index];
}
if lat[index] > max_lat {
max_lat = lat[index];
}
if lon[index] < min_lon {
min_lon = lon[index];
}
if lon[index] > max_lon {
max_lon = lon[index];
}
}
min_lat -= cell_size;
min_lon -= cell_size;
max_lat += cell_size;
max_lon += cell_size;
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
let num_cells = rows * cols;
debug!(
rows_grid = rows,
cols_grid = cols,
total_cells = num_cells,
cell_size,
"Building grid index (CSR)"
);
// First pass: count items per cell
let mut counts = vec![0u32; num_cells];
for index in 0..lat.len() {
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
counts[grid_row * cols + grid_col] += 1;
}
// Build offsets from counts (prefix sum)
let mut offsets = Vec::with_capacity(num_cells + 1);
let mut running = 0u32;
for &count in &counts {
offsets.push(running);
running += count;
}
offsets.push(running);
let total = running as usize;
// Second pass: fill values using write cursors
let mut cursors = offsets[..num_cells].to_vec();
let mut values = vec![0u32; total];
for index in 0..lat.len() {
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
let cell_index = grid_row * cols + grid_col;
let pos = cursors[cell_index] as usize;
values[pos] = index as u32;
cursors[cell_index] += 1;
}
debug!("Grid index built (CSR)");
GridIndex {
min_lat,
min_lon,
cell_size,
cols,
rows,
values,
offsets,
}
}
/// Query accepts f64 bounds (from HTTP parsing) and casts internally.
pub fn query(&self, south: f64, west: f64, north: f64, east: f64) -> Vec<u32> {
let Some((row_min, row_max, col_min, col_max)) =
self.clamp_bounds(south, west, north, east)
else {
return Vec::new();
};
let mut result = Vec::new();
for row in row_min..=row_max {
let row_start = row * self.cols;
for col in col_min..=col_max {
let cell_idx = row_start + col;
let start = self.offsets[cell_idx] as usize;
let end = self.offsets[cell_idx + 1] as usize;
result.extend_from_slice(&self.values[start..end]);
}
}
result
}
#[inline]
pub fn for_each_in_bounds(
&self,
south: f64,
west: f64,
north: f64,
east: f64,
mut callback: impl FnMut(u32),
) {
let Some((row_min, row_max, col_min, col_max)) =
self.clamp_bounds(south, west, north, east)
else {
return;
};
for row in row_min..=row_max {
let row_start = row * self.cols;
for col in col_min..=col_max {
let cell_idx = row_start + col;
let start = self.offsets[cell_idx] as usize;
let end = self.offsets[cell_idx + 1] as usize;
for &row_idx in &self.values[start..end] {
callback(row_idx);
}
}
}
}
fn clamp_bounds(
&self,
south: f64,
west: f64,
north: f64,
east: f64,
) -> Option<(usize, usize, usize, usize)> {
let min_lat = self.min_lat as f64;
let min_lon = self.min_lon as f64;
let cell_size = self.cell_size as f64;
let row_min_raw = ((south - min_lat) / cell_size) as isize;
let row_max_raw = ((north - min_lat) / cell_size) as isize;
let col_min_raw = ((west - min_lon) / cell_size) as isize;
let col_max_raw = ((east - min_lon) / cell_size) as isize;
let row_min = row_min_raw.max(0) as usize;
let row_max_clamped = row_max_raw.min(self.rows as isize - 1);
let col_min = col_min_raw.max(0) as usize;
let col_max_clamped = col_max_raw.min(self.cols as isize - 1);
if row_max_clamped < 0 || col_max_clamped < 0 {
return None;
}
let row_max = row_max_clamped as usize;
let col_max = col_max_clamped as usize;
if row_min > row_max || col_min > col_max {
return None;
}
Some((row_min, row_max, col_min, col_max))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn query_returns_correct_indices() {
let lat = vec![50.0_f32, 50.5, 51.0];
let lon = vec![0.0_f32, 0.5, 1.0];
let grid = GridIndex::build(&lat, &lon, 0.1);
let results = grid.query(49.9, -0.1, 50.1, 0.1);
assert_eq!(results, vec![0]);
}
#[test]
fn query_outside_bounds_returns_empty() {
let lat = vec![50.0_f32];
let lon = vec![0.0_f32];
let grid = GridIndex::build(&lat, &lon, 0.1);
assert!(grid.query(0.0, 0.0, 1.0, 1.0).is_empty());
}
#[test]
fn empty_input_returns_empty_results() {
let grid = GridIndex::build(&[], &[], 0.1);
assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty());
}
}