Refactor and improve
This commit is contained in:
parent
1f148b2185
commit
242acff987
22 changed files with 754 additions and 1053 deletions
230
server-rs/src/utils/grid_index.rs
Normal file
230
server-rs/src/utils/grid_index.rs
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
use tracing::debug;
|
||||
|
||||
/// Grid-based spatial index for fast rectangle queries over property rows.
|
||||
///
|
||||
/// Divides the bounding box into cells of ~0.01 degrees (~1km).
|
||||
/// Uses a Compressed Sparse Row (CSR) layout: a single flat `values` array
|
||||
/// plus an `offsets` array so that cell `i` owns `values[offsets[i]..offsets[i+1]]`.
|
||||
/// This eliminates per-cell Vec overhead (24 bytes each for ptr+len+cap).
|
||||
pub struct GridIndex {
|
||||
min_lat: f32,
|
||||
min_lon: f32,
|
||||
cell_size: f32,
|
||||
cols: usize,
|
||||
rows: usize,
|
||||
/// Flat array of row indices, grouped by cell.
|
||||
values: Vec<u32>,
|
||||
/// offsets[i] is the start index in `values` for cell i.
|
||||
/// offsets[num_cells] is values.len() (sentinel).
|
||||
offsets: Vec<u32>,
|
||||
}
|
||||
|
||||
impl GridIndex {
|
||||
pub fn build(lat: &[f32], lon: &[f32], cell_size: f32) -> Self {
|
||||
if lat.is_empty() {
|
||||
return GridIndex {
|
||||
min_lat: 0.0,
|
||||
min_lon: 0.0,
|
||||
cell_size,
|
||||
cols: 0,
|
||||
rows: 0,
|
||||
values: Vec::new(),
|
||||
offsets: vec![0],
|
||||
};
|
||||
}
|
||||
|
||||
let mut min_lat = f32::INFINITY;
|
||||
let mut max_lat = f32::NEG_INFINITY;
|
||||
let mut min_lon = f32::INFINITY;
|
||||
let mut max_lon = f32::NEG_INFINITY;
|
||||
|
||||
for index in 0..lat.len() {
|
||||
if lat[index] < min_lat {
|
||||
min_lat = lat[index];
|
||||
}
|
||||
if lat[index] > max_lat {
|
||||
max_lat = lat[index];
|
||||
}
|
||||
if lon[index] < min_lon {
|
||||
min_lon = lon[index];
|
||||
}
|
||||
if lon[index] > max_lon {
|
||||
max_lon = lon[index];
|
||||
}
|
||||
}
|
||||
|
||||
min_lat -= cell_size;
|
||||
min_lon -= cell_size;
|
||||
max_lat += cell_size;
|
||||
max_lon += cell_size;
|
||||
|
||||
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
|
||||
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
|
||||
let num_cells = rows * cols;
|
||||
|
||||
debug!(
|
||||
rows_grid = rows,
|
||||
cols_grid = cols,
|
||||
total_cells = num_cells,
|
||||
cell_size,
|
||||
"Building grid index (CSR)"
|
||||
);
|
||||
|
||||
// First pass: count items per cell
|
||||
let mut counts = vec![0u32; num_cells];
|
||||
for index in 0..lat.len() {
|
||||
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
||||
counts[grid_row * cols + grid_col] += 1;
|
||||
}
|
||||
|
||||
// Build offsets from counts (prefix sum)
|
||||
let mut offsets = Vec::with_capacity(num_cells + 1);
|
||||
let mut running = 0u32;
|
||||
for &count in &counts {
|
||||
offsets.push(running);
|
||||
running += count;
|
||||
}
|
||||
offsets.push(running);
|
||||
let total = running as usize;
|
||||
|
||||
// Second pass: fill values using write cursors
|
||||
let mut cursors = offsets[..num_cells].to_vec();
|
||||
let mut values = vec![0u32; total];
|
||||
for index in 0..lat.len() {
|
||||
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
||||
let cell_index = grid_row * cols + grid_col;
|
||||
let pos = cursors[cell_index] as usize;
|
||||
values[pos] = index as u32;
|
||||
cursors[cell_index] += 1;
|
||||
}
|
||||
|
||||
debug!("Grid index built (CSR)");
|
||||
|
||||
GridIndex {
|
||||
min_lat,
|
||||
min_lon,
|
||||
cell_size,
|
||||
cols,
|
||||
rows,
|
||||
values,
|
||||
offsets,
|
||||
}
|
||||
}
|
||||
|
||||
/// Query accepts f64 bounds (from HTTP parsing) and casts internally.
|
||||
pub fn query(&self, south: f64, west: f64, north: f64, east: f64) -> Vec<u32> {
|
||||
let Some((row_min, row_max, col_min, col_max)) =
|
||||
self.clamp_bounds(south, west, north, east)
|
||||
else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let mut result = Vec::new();
|
||||
for row in row_min..=row_max {
|
||||
let row_start = row * self.cols;
|
||||
for col in col_min..=col_max {
|
||||
let cell_idx = row_start + col;
|
||||
let start = self.offsets[cell_idx] as usize;
|
||||
let end = self.offsets[cell_idx + 1] as usize;
|
||||
result.extend_from_slice(&self.values[start..end]);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn for_each_in_bounds(
|
||||
&self,
|
||||
south: f64,
|
||||
west: f64,
|
||||
north: f64,
|
||||
east: f64,
|
||||
mut callback: impl FnMut(u32),
|
||||
) {
|
||||
let Some((row_min, row_max, col_min, col_max)) =
|
||||
self.clamp_bounds(south, west, north, east)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
for row in row_min..=row_max {
|
||||
let row_start = row * self.cols;
|
||||
for col in col_min..=col_max {
|
||||
let cell_idx = row_start + col;
|
||||
let start = self.offsets[cell_idx] as usize;
|
||||
let end = self.offsets[cell_idx + 1] as usize;
|
||||
for &row_idx in &self.values[start..end] {
|
||||
callback(row_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn clamp_bounds(
|
||||
&self,
|
||||
south: f64,
|
||||
west: f64,
|
||||
north: f64,
|
||||
east: f64,
|
||||
) -> Option<(usize, usize, usize, usize)> {
|
||||
let min_lat = self.min_lat as f64;
|
||||
let min_lon = self.min_lon as f64;
|
||||
let cell_size = self.cell_size as f64;
|
||||
|
||||
let row_min_raw = ((south - min_lat) / cell_size) as isize;
|
||||
let row_max_raw = ((north - min_lat) / cell_size) as isize;
|
||||
let col_min_raw = ((west - min_lon) / cell_size) as isize;
|
||||
let col_max_raw = ((east - min_lon) / cell_size) as isize;
|
||||
|
||||
let row_min = row_min_raw.max(0) as usize;
|
||||
let row_max_clamped = row_max_raw.min(self.rows as isize - 1);
|
||||
let col_min = col_min_raw.max(0) as usize;
|
||||
let col_max_clamped = col_max_raw.min(self.cols as isize - 1);
|
||||
|
||||
if row_max_clamped < 0 || col_max_clamped < 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let row_max = row_max_clamped as usize;
|
||||
let col_max = col_max_clamped as usize;
|
||||
|
||||
if row_min > row_max || col_min > col_max {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((row_min, row_max, col_min, col_max))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn query_returns_correct_indices() {
|
||||
let lat = vec![50.0_f32, 50.5, 51.0];
|
||||
let lon = vec![0.0_f32, 0.5, 1.0];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.1);
|
||||
|
||||
let results = grid.query(49.9, -0.1, 50.1, 0.1);
|
||||
assert_eq!(results, vec![0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_outside_bounds_returns_empty() {
|
||||
let lat = vec![50.0_f32];
|
||||
let lon = vec![0.0_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.1);
|
||||
|
||||
assert!(grid.query(0.0, 0.0, 1.0, 1.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_returns_empty_results() {
|
||||
let grid = GridIndex::build(&[], &[], 0.1);
|
||||
assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty());
|
||||
}
|
||||
}
|
||||
39
server-rs/src/utils/hash.rs
Normal file
39
server-rs/src/utils/hash.rs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/// Generate a deterministic pseudo-random priority value from an index using splitmix64.
|
||||
/// Used for shuffling rows in a deterministic but random-looking order.
|
||||
#[inline]
|
||||
pub fn splitmix64_hash(index: usize) -> u32 {
|
||||
let mut hash = (index as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15);
|
||||
hash = (hash ^ (hash >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
|
||||
hash = (hash ^ (hash >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
|
||||
hash = hash ^ (hash >> 31);
|
||||
hash as u32
|
||||
}
|
||||
|
||||
/// Generate priority values for a range of indices.
|
||||
pub fn generate_priorities(row_count: usize) -> Vec<u32> {
|
||||
(0..row_count).map(splitmix64_hash).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn deterministic() {
|
||||
assert_eq!(splitmix64_hash(0), splitmix64_hash(0));
|
||||
assert_eq!(splitmix64_hash(12345), splitmix64_hash(12345));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_inputs_differ() {
|
||||
assert_ne!(splitmix64_hash(0), splitmix64_hash(1));
|
||||
assert_ne!(splitmix64_hash(100), splitmix64_hash(101));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_priorities_length() {
|
||||
assert_eq!(generate_priorities(0).len(), 0);
|
||||
assert_eq!(generate_priorities(5).len(), 5);
|
||||
assert_eq!(generate_priorities(1000).len(), 1000);
|
||||
}
|
||||
}
|
||||
68
server-rs/src/utils/interned_column.rs
Normal file
68
server-rs/src/utils/interned_column.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/// Interned string column: a small set of unique values indexed by u16 per row.
|
||||
pub struct InternedColumn {
|
||||
pub values: Vec<String>,
|
||||
pub indices: Vec<u16>,
|
||||
}
|
||||
|
||||
impl InternedColumn {
|
||||
pub fn build(raw: &[String]) -> Self {
|
||||
let mut unique_map: rustc_hash::FxHashMap<&str, u16> = rustc_hash::FxHashMap::default();
|
||||
let mut values: Vec<String> = Vec::new();
|
||||
let mut indices = Vec::with_capacity(raw.len());
|
||||
|
||||
for text in raw {
|
||||
let idx = if let Some(&existing) = unique_map.get(text.as_str()) {
|
||||
existing
|
||||
} else {
|
||||
assert!(
|
||||
values.len() < u16::MAX as usize,
|
||||
"InternedColumn overflow: more than {} unique values",
|
||||
u16::MAX
|
||||
);
|
||||
let idx = values.len() as u16;
|
||||
values.push(text.clone());
|
||||
unique_map.insert(text.as_str(), idx);
|
||||
idx
|
||||
};
|
||||
indices.push(idx);
|
||||
}
|
||||
|
||||
InternedColumn { values, indices }
|
||||
}
|
||||
|
||||
/// Resolve the string for a given row.
|
||||
pub fn get(&self, row: usize) -> &str {
|
||||
&self.values[self.indices[row] as usize]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic_interning() {
|
||||
let raw: Vec<String> = vec!["a".into(), "b".into(), "a".into(), "c".into(), "b".into()];
|
||||
let col = InternedColumn::build(&raw);
|
||||
|
||||
assert_eq!(col.values, vec!["a", "b", "c"]);
|
||||
assert_eq!(col.indices, vec![0, 1, 0, 2, 1]);
|
||||
assert_eq!(col.get(0), "a");
|
||||
assert_eq!(col.get(2), "a");
|
||||
assert_eq!(col.get(3), "c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
let col = InternedColumn::build(&[]);
|
||||
assert!(col.values.is_empty());
|
||||
assert!(col.indices.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "InternedColumn overflow")]
|
||||
fn u16_overflow_panics() {
|
||||
let raw: Vec<String> = (0..=u16::MAX as u32).map(|i| i.to_string()).collect();
|
||||
let _col = InternedColumn::build(&raw);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue