Add property listing

This commit is contained in:
Andras Schmelczer 2026-01-31 12:50:01 +00:00
parent 51967fa880
commit 85f5770e09
3 changed files with 254 additions and 7 deletions

View file

@ -6,7 +6,7 @@ use std::path::Path;
use crate::consts::{FEATURE_PERCENTILE_LOW, FEATURE_PERCENTILE_HIGH, HISTOGRAM_BINS, H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX};
/// Columns to exclude from feature discovery (not numeric features)
/// Columns to exclude from feature discovery
const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"];
/// H3 valid resolution range (0-15)
@ -65,6 +65,13 @@ pub struct PropertyData {
pub feature_data: Vec<f64>,
/// Precomputed stats (percentiles + histogram) for each feature
pub feature_stats: Vec<FeatureStats>,
/// String fields for property details
pub address: Vec<String>,
pub postcode: Vec<String>,
pub property_type: Vec<String>,
pub built_form: Vec<String>,
pub current_energy_rating: Vec<String>,
pub potential_energy_rating: Vec<String>,
}
/// Approximate a percentile from a histogram using linear interpolation.
@ -213,14 +220,34 @@ impl PropertyData {
let mut cols_needed: Vec<String> = vec!["lat".into(), "lon".into()];
cols_needed.extend(feature_names.iter().cloned());
// Add string columns (using actual column names from parquet)
let string_cols = vec![
"pp_address", "postcode", "pp_property_type", "built_form",
"current_energy_rating", "potential_energy_rating"
];
// Build selection with proper casting
let mut select_exprs: Vec<polars::prelude::Expr> = vec![];
// lat/lon as f64
select_exprs.push(col("lat").cast(DataType::Float64));
select_exprs.push(col("lon").cast(DataType::Float64));
// numeric features as f64
for name in &feature_names {
select_exprs.push(col(name.as_str()).cast(DataType::Float64));
}
// string columns as string (check if they exist in schema)
for &s_col in &string_cols {
if schema.get(s_col).is_some() {
select_exprs.push(col(s_col).cast(DataType::String));
}
}
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
.expect("Failed to scan parquet")
.select(
cols_needed
.iter()
.map(|c| col(c.as_str()).cast(DataType::Float64))
.collect::<Vec<_>>(),
)
.select(select_exprs)
.collect()
.expect("Failed to read parquet");
@ -262,6 +289,44 @@ impl PropertyData {
})
.collect();
// Extract string columns (before permutation)
eprintln!("Extracting string columns...");
let address_raw: Vec<String> = if let Ok(col) = df.column("pp_address") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
let postcode_raw: Vec<String> = if let Ok(col) = df.column("postcode") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
let property_type_raw: Vec<String> = if let Ok(col) = df.column("pp_property_type") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
let built_form_raw: Vec<String> = if let Ok(col) = df.column("built_form") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
let current_energy_rating_raw: Vec<String> = if let Ok(col) = df.column("current_energy_rating") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
let potential_energy_rating_raw: Vec<String> = if let Ok(col) = df.column("potential_energy_rating") {
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
} else {
vec![String::new(); row_count]
};
// Sort all rows by spatial locality so that grid queries access
// contiguous memory (sequential reads instead of random DRAM accesses).
// Uses the same 0.01° grid cell as the spatial index for the sort key.
@ -283,6 +348,14 @@ impl PropertyData {
let lat: Vec<f64> = perm.iter().map(|&i| lat[i as usize]).collect();
let lon: Vec<f64> = perm.iter().map(|&i| lon[i as usize]).collect();
// Apply permutation to string columns
let address: Vec<String> = perm.iter().map(|&i| address_raw[i as usize].clone()).collect();
let postcode: Vec<String> = perm.iter().map(|&i| postcode_raw[i as usize].clone()).collect();
let property_type: Vec<String> = perm.iter().map(|&i| property_type_raw[i as usize].clone()).collect();
let built_form: Vec<String> = perm.iter().map(|&i| built_form_raw[i as usize].clone()).collect();
let current_energy_rating: Vec<String> = perm.iter().map(|&i| current_energy_rating_raw[i as usize].clone()).collect();
let potential_energy_rating: Vec<String> = perm.iter().map(|&i| potential_energy_rating_raw[i as usize].clone()).collect();
// Transpose to row-major AND apply spatial permutation in one pass.
// Result: all features for one row are contiguous, and spatially
// nearby rows are adjacent in memory.
@ -305,6 +378,12 @@ impl PropertyData {
num_features,
feature_data,
feature_stats,
address,
postcode,
property_type,
built_form,
current_energy_rating,
potential_energy_rating,
}
}
}

View file

@ -72,6 +72,7 @@ async fn main() {
let state_hexagons = state.clone();
let state_pois = state.clone();
let state_poi_categories = state.clone();
let state_hexagon_properties = state.clone();
let api = Router::new()
.route(
@ -89,6 +90,10 @@ async fn main() {
.route(
"/api/poi-categories",
get(move || routes::get_poi_categories(state_poi_categories.clone())),
)
.route(
"/api/hexagon-properties",
get(move |query| routes::get_hexagon_properties(state_hexagon_properties.clone(), query)),
);
// Static file serving for frontend

View file

@ -1,4 +1,5 @@
use std::fmt::Write;
use std::str::FromStr;
use std::sync::Arc;
use axum::extract::Query;
@ -459,3 +460,165 @@ pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesRespo
Json(result)
}
// ── /api/hexagon-properties ──
#[derive(Deserialize)]
pub struct HexagonPropertiesParams {
pub h3: String,
pub resolution: u8,
pub filters: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
#[derive(Serialize)]
pub struct Property {
// String fields
pub address: Option<String>,
pub postcode: Option<String>,
pub property_type: Option<String>,
pub built_form: Option<String>,
pub current_energy_rating: Option<String>,
pub potential_energy_rating: Option<String>,
// Numeric fields
pub lat: f64,
pub lon: f64,
// All other numeric features stored as dynamic map
#[serde(flatten)]
pub features: FxHashMap<String, f64>,
}
#[derive(Serialize)]
pub struct HexagonPropertiesResponse {
pub properties: Vec<Property>,
pub total: usize,
pub limit: usize,
pub offset: usize,
pub truncated: bool,
}
/// Helper function to check if a row passes all filters
fn row_passes_filters(row: usize, filters: &[ParsedFilter], feature_data: &[f64], num_features: usize) -> bool {
filters.iter().all(|f| {
let v = feature_data[row * num_features + f.feat_idx];
v.is_finite() && v >= f.min && v <= f.max
})
}
pub async fn get_hexagon_properties(
state: Arc<AppState>,
Query(params): Query<HexagonPropertiesParams>,
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
// 1. Parse H3 cell ID
let cell = h3o::CellIndex::from_str(&params.h3)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e)))?;
let cell_u64: u64 = cell.into();
// 2. Validate resolution
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
return Err((StatusCode::BAD_REQUEST, "Invalid or non-precomputed resolution".to_string()));
}
// 3. Parse filters (reuse existing filter parsing logic from get_hexagons)
let parsed_filters: Vec<ParsedFilter> = params
.filters
.as_deref()
.filter(|s| !s.is_empty())
.map(|s| {
s.split(',')
.filter_map(|entry| {
let parts: Vec<&str> = entry.splitn(3, ':').collect();
if parts.len() != 3 {
return None;
}
let name = parts[0].trim();
let min = parts[1].trim().parse::<f64>().ok()?;
let max = parts[2].trim().parse::<f64>().ok()?;
let feat_idx = state.data.feature_names.iter().position(|n| n == name)?;
Some(ParsedFilter { feat_idx, min, max })
})
.collect()
})
.unwrap_or_default();
// Move CPU-heavy work off the async executor
let result = tokio::task::spawn_blocking(move || {
let h3_data = &state.h3_cells[resolution];
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
// 4. Find all rows with matching H3 cell
let matching_rows: Vec<usize> = h3_data
.iter()
.enumerate()
.filter_map(|(idx, &h3_cell)| {
if h3_cell == cell_u64 {
// Apply feature filters
if row_passes_filters(idx, &parsed_filters, feature_data, num_features) {
Some(idx)
} else {
None
}
} else {
None
}
})
.collect();
let total = matching_rows.len();
let limit = params.limit.unwrap_or(100).min(500);
let offset = params.offset.unwrap_or(0);
let truncated = total > offset + limit;
// 5. Extract properties for paginated subset
let properties: Vec<Property> = matching_rows
.iter()
.skip(offset)
.take(limit)
.map(|&row| {
// Build dynamic features map
let mut features = FxHashMap::default();
let base = row * num_features;
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
let v = feature_data[base + feat_idx];
if v.is_finite() {
features.insert(feat_name.clone(), v);
}
}
// Helper to get non-empty string
let get_string = |s: &str| -> Option<String> {
if s.is_empty() { None } else { Some(s.to_string()) }
};
Property {
address: get_string(&state.data.address[row]),
postcode: get_string(&state.data.postcode[row]),
property_type: get_string(&state.data.property_type[row]),
built_form: get_string(&state.data.built_form[row]),
current_energy_rating: get_string(&state.data.current_energy_rating[row]),
potential_energy_rating: get_string(&state.data.potential_energy_rating[row]),
lat: state.data.lat[row],
lon: state.data.lon[row],
features,
}
})
.collect();
HexagonPropertiesResponse {
properties,
total,
limit,
offset,
truncated,
}
})
.await
.unwrap();
Ok(Json(result))
}