From 85f5770e09a0f8c62975257bcce991a5aaed629c Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sat, 31 Jan 2026 12:50:01 +0000 Subject: [PATCH] Add property listing --- server-rs/src/data.rs | 93 +++++++++++++++++++++-- server-rs/src/main.rs | 5 ++ server-rs/src/routes.rs | 163 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 7 deletions(-) diff --git a/server-rs/src/data.rs b/server-rs/src/data.rs index 16928c3..6a3cb48 100644 --- a/server-rs/src/data.rs +++ b/server-rs/src/data.rs @@ -6,7 +6,7 @@ use std::path::Path; use crate::consts::{FEATURE_PERCENTILE_LOW, FEATURE_PERCENTILE_HIGH, HISTOGRAM_BINS, H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX}; -/// Columns to exclude from feature discovery (not numeric features) +/// Columns to exclude from feature discovery const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"]; /// H3 valid resolution range (0-15) @@ -65,6 +65,13 @@ pub struct PropertyData { pub feature_data: Vec, /// Precomputed stats (percentiles + histogram) for each feature pub feature_stats: Vec, + /// String fields for property details + pub address: Vec, + pub postcode: Vec, + pub property_type: Vec, + pub built_form: Vec, + pub current_energy_rating: Vec, + pub potential_energy_rating: Vec, } /// Approximate a percentile from a histogram using linear interpolation. @@ -213,14 +220,34 @@ impl PropertyData { let mut cols_needed: Vec = vec!["lat".into(), "lon".into()]; cols_needed.extend(feature_names.iter().cloned()); + // Add string columns (using actual column names from parquet) + let string_cols = vec![ + "pp_address", "postcode", "pp_property_type", "built_form", + "current_energy_rating", "potential_energy_rating" + ]; + + // Build selection with proper casting + let mut select_exprs: Vec = vec![]; + + // lat/lon as f64 + select_exprs.push(col("lat").cast(DataType::Float64)); + select_exprs.push(col("lon").cast(DataType::Float64)); + + // numeric features as f64 + for name in &feature_names { + select_exprs.push(col(name.as_str()).cast(DataType::Float64)); + } + + // string columns as string (check if they exist in schema) + for &s_col in &string_cols { + if schema.get(s_col).is_some() { + select_exprs.push(col(s_col).cast(DataType::String)); + } + } + let df = LazyFrame::scan_parquet(parquet_path, Default::default()) .expect("Failed to scan parquet") - .select( - cols_needed - .iter() - .map(|c| col(c.as_str()).cast(DataType::Float64)) - .collect::>(), - ) + .select(select_exprs) .collect() .expect("Failed to read parquet"); @@ -262,6 +289,44 @@ impl PropertyData { }) .collect(); + // Extract string columns (before permutation) + eprintln!("Extracting string columns..."); + let address_raw: Vec = if let Ok(col) = df.column("pp_address") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + + let postcode_raw: Vec = if let Ok(col) = df.column("postcode") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + + let property_type_raw: Vec = if let Ok(col) = df.column("pp_property_type") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + + let built_form_raw: Vec = if let Ok(col) = df.column("built_form") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + + let current_energy_rating_raw: Vec = if let Ok(col) = df.column("current_energy_rating") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + + let potential_energy_rating_raw: Vec = if let Ok(col) = df.column("potential_energy_rating") { + col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + } else { + vec![String::new(); row_count] + }; + // Sort all rows by spatial locality so that grid queries access // contiguous memory (sequential reads instead of random DRAM accesses). // Uses the same 0.01° grid cell as the spatial index for the sort key. @@ -283,6 +348,14 @@ impl PropertyData { let lat: Vec = perm.iter().map(|&i| lat[i as usize]).collect(); let lon: Vec = perm.iter().map(|&i| lon[i as usize]).collect(); + // Apply permutation to string columns + let address: Vec = perm.iter().map(|&i| address_raw[i as usize].clone()).collect(); + let postcode: Vec = perm.iter().map(|&i| postcode_raw[i as usize].clone()).collect(); + let property_type: Vec = perm.iter().map(|&i| property_type_raw[i as usize].clone()).collect(); + let built_form: Vec = perm.iter().map(|&i| built_form_raw[i as usize].clone()).collect(); + let current_energy_rating: Vec = perm.iter().map(|&i| current_energy_rating_raw[i as usize].clone()).collect(); + let potential_energy_rating: Vec = perm.iter().map(|&i| potential_energy_rating_raw[i as usize].clone()).collect(); + // Transpose to row-major AND apply spatial permutation in one pass. // Result: all features for one row are contiguous, and spatially // nearby rows are adjacent in memory. @@ -305,6 +378,12 @@ impl PropertyData { num_features, feature_data, feature_stats, + address, + postcode, + property_type, + built_form, + current_energy_rating, + potential_energy_rating, } } } diff --git a/server-rs/src/main.rs b/server-rs/src/main.rs index a3bc3ea..4756e66 100644 --- a/server-rs/src/main.rs +++ b/server-rs/src/main.rs @@ -72,6 +72,7 @@ async fn main() { let state_hexagons = state.clone(); let state_pois = state.clone(); let state_poi_categories = state.clone(); + let state_hexagon_properties = state.clone(); let api = Router::new() .route( @@ -89,6 +90,10 @@ async fn main() { .route( "/api/poi-categories", get(move || routes::get_poi_categories(state_poi_categories.clone())), + ) + .route( + "/api/hexagon-properties", + get(move |query| routes::get_hexagon_properties(state_hexagon_properties.clone(), query)), ); // Static file serving for frontend diff --git a/server-rs/src/routes.rs b/server-rs/src/routes.rs index baa3099..9f3d251 100644 --- a/server-rs/src/routes.rs +++ b/server-rs/src/routes.rs @@ -1,4 +1,5 @@ use std::fmt::Write; +use std::str::FromStr; use std::sync::Arc; use axum::extract::Query; @@ -459,3 +460,165 @@ pub async fn get_poi_categories(state: Arc) -> Json, + pub limit: Option, + pub offset: Option, +} + +#[derive(Serialize)] +pub struct Property { + // String fields + pub address: Option, + pub postcode: Option, + pub property_type: Option, + pub built_form: Option, + pub current_energy_rating: Option, + pub potential_energy_rating: Option, + + // Numeric fields + pub lat: f64, + pub lon: f64, + + // All other numeric features stored as dynamic map + #[serde(flatten)] + pub features: FxHashMap, +} + +#[derive(Serialize)] +pub struct HexagonPropertiesResponse { + pub properties: Vec, + pub total: usize, + pub limit: usize, + pub offset: usize, + pub truncated: bool, +} + +/// Helper function to check if a row passes all filters +fn row_passes_filters(row: usize, filters: &[ParsedFilter], feature_data: &[f64], num_features: usize) -> bool { + filters.iter().all(|f| { + let v = feature_data[row * num_features + f.feat_idx]; + v.is_finite() && v >= f.min && v <= f.max + }) +} + +pub async fn get_hexagon_properties( + state: Arc, + Query(params): Query, +) -> Result, (StatusCode, String)> { + // 1. Parse H3 cell ID + let cell = h3o::CellIndex::from_str(¶ms.h3) + .map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e)))?; + let cell_u64: u64 = cell.into(); + + // 2. Validate resolution + let resolution = params.resolution as usize; + if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() { + return Err((StatusCode::BAD_REQUEST, "Invalid or non-precomputed resolution".to_string())); + } + + // 3. Parse filters (reuse existing filter parsing logic from get_hexagons) + let parsed_filters: Vec = params + .filters + .as_deref() + .filter(|s| !s.is_empty()) + .map(|s| { + s.split(',') + .filter_map(|entry| { + let parts: Vec<&str> = entry.splitn(3, ':').collect(); + if parts.len() != 3 { + return None; + } + let name = parts[0].trim(); + let min = parts[1].trim().parse::().ok()?; + let max = parts[2].trim().parse::().ok()?; + let feat_idx = state.data.feature_names.iter().position(|n| n == name)?; + Some(ParsedFilter { feat_idx, min, max }) + }) + .collect() + }) + .unwrap_or_default(); + + // Move CPU-heavy work off the async executor + let result = tokio::task::spawn_blocking(move || { + let h3_data = &state.h3_cells[resolution]; + let num_features = state.data.num_features; + let feature_data = &state.data.feature_data; + + // 4. Find all rows with matching H3 cell + let matching_rows: Vec = h3_data + .iter() + .enumerate() + .filter_map(|(idx, &h3_cell)| { + if h3_cell == cell_u64 { + // Apply feature filters + if row_passes_filters(idx, &parsed_filters, feature_data, num_features) { + Some(idx) + } else { + None + } + } else { + None + } + }) + .collect(); + + let total = matching_rows.len(); + let limit = params.limit.unwrap_or(100).min(500); + let offset = params.offset.unwrap_or(0); + let truncated = total > offset + limit; + + // 5. Extract properties for paginated subset + let properties: Vec = matching_rows + .iter() + .skip(offset) + .take(limit) + .map(|&row| { + // Build dynamic features map + let mut features = FxHashMap::default(); + let base = row * num_features; + for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() { + let v = feature_data[base + feat_idx]; + if v.is_finite() { + features.insert(feat_name.clone(), v); + } + } + + // Helper to get non-empty string + let get_string = |s: &str| -> Option { + if s.is_empty() { None } else { Some(s.to_string()) } + }; + + Property { + address: get_string(&state.data.address[row]), + postcode: get_string(&state.data.postcode[row]), + property_type: get_string(&state.data.property_type[row]), + built_form: get_string(&state.data.built_form[row]), + current_energy_rating: get_string(&state.data.current_energy_rating[row]), + potential_energy_rating: get_string(&state.data.potential_energy_rating[row]), + lat: state.data.lat[row], + lon: state.data.lon[row], + features, + } + }) + .collect(); + + HexagonPropertiesResponse { + properties, + total, + limit, + offset, + truncated, + } + }) + .await + .unwrap(); + + Ok(Json(result)) +}