Add property listing
This commit is contained in:
parent
51967fa880
commit
85f5770e09
3 changed files with 254 additions and 7 deletions
|
|
@ -6,7 +6,7 @@ use std::path::Path;
|
|||
|
||||
use crate::consts::{FEATURE_PERCENTILE_LOW, FEATURE_PERCENTILE_HIGH, HISTOGRAM_BINS, H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX};
|
||||
|
||||
/// Columns to exclude from feature discovery (not numeric features)
|
||||
/// Columns to exclude from feature discovery
|
||||
const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"];
|
||||
|
||||
/// H3 valid resolution range (0-15)
|
||||
|
|
@ -65,6 +65,13 @@ pub struct PropertyData {
|
|||
pub feature_data: Vec<f64>,
|
||||
/// Precomputed stats (percentiles + histogram) for each feature
|
||||
pub feature_stats: Vec<FeatureStats>,
|
||||
/// String fields for property details
|
||||
pub address: Vec<String>,
|
||||
pub postcode: Vec<String>,
|
||||
pub property_type: Vec<String>,
|
||||
pub built_form: Vec<String>,
|
||||
pub current_energy_rating: Vec<String>,
|
||||
pub potential_energy_rating: Vec<String>,
|
||||
}
|
||||
|
||||
/// Approximate a percentile from a histogram using linear interpolation.
|
||||
|
|
@ -213,14 +220,34 @@ impl PropertyData {
|
|||
let mut cols_needed: Vec<String> = vec!["lat".into(), "lon".into()];
|
||||
cols_needed.extend(feature_names.iter().cloned());
|
||||
|
||||
// Add string columns (using actual column names from parquet)
|
||||
let string_cols = vec![
|
||||
"pp_address", "postcode", "pp_property_type", "built_form",
|
||||
"current_energy_rating", "potential_energy_rating"
|
||||
];
|
||||
|
||||
// Build selection with proper casting
|
||||
let mut select_exprs: Vec<polars::prelude::Expr> = vec![];
|
||||
|
||||
// lat/lon as f64
|
||||
select_exprs.push(col("lat").cast(DataType::Float64));
|
||||
select_exprs.push(col("lon").cast(DataType::Float64));
|
||||
|
||||
// numeric features as f64
|
||||
for name in &feature_names {
|
||||
select_exprs.push(col(name.as_str()).cast(DataType::Float64));
|
||||
}
|
||||
|
||||
// string columns as string (check if they exist in schema)
|
||||
for &s_col in &string_cols {
|
||||
if schema.get(s_col).is_some() {
|
||||
select_exprs.push(col(s_col).cast(DataType::String));
|
||||
}
|
||||
}
|
||||
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.expect("Failed to scan parquet")
|
||||
.select(
|
||||
cols_needed
|
||||
.iter()
|
||||
.map(|c| col(c.as_str()).cast(DataType::Float64))
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.select(select_exprs)
|
||||
.collect()
|
||||
.expect("Failed to read parquet");
|
||||
|
||||
|
|
@ -262,6 +289,44 @@ impl PropertyData {
|
|||
})
|
||||
.collect();
|
||||
|
||||
// Extract string columns (before permutation)
|
||||
eprintln!("Extracting string columns...");
|
||||
let address_raw: Vec<String> = if let Ok(col) = df.column("pp_address") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
let postcode_raw: Vec<String> = if let Ok(col) = df.column("postcode") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
let property_type_raw: Vec<String> = if let Ok(col) = df.column("pp_property_type") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
let built_form_raw: Vec<String> = if let Ok(col) = df.column("built_form") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
let current_energy_rating_raw: Vec<String> = if let Ok(col) = df.column("current_energy_rating") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
let potential_energy_rating_raw: Vec<String> = if let Ok(col) = df.column("potential_energy_rating") {
|
||||
col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect()
|
||||
} else {
|
||||
vec![String::new(); row_count]
|
||||
};
|
||||
|
||||
// Sort all rows by spatial locality so that grid queries access
|
||||
// contiguous memory (sequential reads instead of random DRAM accesses).
|
||||
// Uses the same 0.01° grid cell as the spatial index for the sort key.
|
||||
|
|
@ -283,6 +348,14 @@ impl PropertyData {
|
|||
let lat: Vec<f64> = perm.iter().map(|&i| lat[i as usize]).collect();
|
||||
let lon: Vec<f64> = perm.iter().map(|&i| lon[i as usize]).collect();
|
||||
|
||||
// Apply permutation to string columns
|
||||
let address: Vec<String> = perm.iter().map(|&i| address_raw[i as usize].clone()).collect();
|
||||
let postcode: Vec<String> = perm.iter().map(|&i| postcode_raw[i as usize].clone()).collect();
|
||||
let property_type: Vec<String> = perm.iter().map(|&i| property_type_raw[i as usize].clone()).collect();
|
||||
let built_form: Vec<String> = perm.iter().map(|&i| built_form_raw[i as usize].clone()).collect();
|
||||
let current_energy_rating: Vec<String> = perm.iter().map(|&i| current_energy_rating_raw[i as usize].clone()).collect();
|
||||
let potential_energy_rating: Vec<String> = perm.iter().map(|&i| potential_energy_rating_raw[i as usize].clone()).collect();
|
||||
|
||||
// Transpose to row-major AND apply spatial permutation in one pass.
|
||||
// Result: all features for one row are contiguous, and spatially
|
||||
// nearby rows are adjacent in memory.
|
||||
|
|
@ -305,6 +378,12 @@ impl PropertyData {
|
|||
num_features,
|
||||
feature_data,
|
||||
feature_stats,
|
||||
address,
|
||||
postcode,
|
||||
property_type,
|
||||
built_form,
|
||||
current_energy_rating,
|
||||
potential_energy_rating,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ async fn main() {
|
|||
let state_hexagons = state.clone();
|
||||
let state_pois = state.clone();
|
||||
let state_poi_categories = state.clone();
|
||||
let state_hexagon_properties = state.clone();
|
||||
|
||||
let api = Router::new()
|
||||
.route(
|
||||
|
|
@ -89,6 +90,10 @@ async fn main() {
|
|||
.route(
|
||||
"/api/poi-categories",
|
||||
get(move || routes::get_poi_categories(state_poi_categories.clone())),
|
||||
)
|
||||
.route(
|
||||
"/api/hexagon-properties",
|
||||
get(move |query| routes::get_hexagon_properties(state_hexagon_properties.clone(), query)),
|
||||
);
|
||||
|
||||
// Static file serving for frontend
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use std::fmt::Write;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
|
|
@ -459,3 +460,165 @@ pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesRespo
|
|||
|
||||
Json(result)
|
||||
}
|
||||
|
||||
// ── /api/hexagon-properties ──
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct HexagonPropertiesParams {
|
||||
pub h3: String,
|
||||
pub resolution: u8,
|
||||
pub filters: Option<String>,
|
||||
pub limit: Option<usize>,
|
||||
pub offset: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct Property {
|
||||
// String fields
|
||||
pub address: Option<String>,
|
||||
pub postcode: Option<String>,
|
||||
pub property_type: Option<String>,
|
||||
pub built_form: Option<String>,
|
||||
pub current_energy_rating: Option<String>,
|
||||
pub potential_energy_rating: Option<String>,
|
||||
|
||||
// Numeric fields
|
||||
pub lat: f64,
|
||||
pub lon: f64,
|
||||
|
||||
// All other numeric features stored as dynamic map
|
||||
#[serde(flatten)]
|
||||
pub features: FxHashMap<String, f64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct HexagonPropertiesResponse {
|
||||
pub properties: Vec<Property>,
|
||||
pub total: usize,
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
/// Helper function to check if a row passes all filters
|
||||
fn row_passes_filters(row: usize, filters: &[ParsedFilter], feature_data: &[f64], num_features: usize) -> bool {
|
||||
filters.iter().all(|f| {
|
||||
let v = feature_data[row * num_features + f.feat_idx];
|
||||
v.is_finite() && v >= f.min && v <= f.max
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_hexagon_properties(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<HexagonPropertiesParams>,
|
||||
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
|
||||
// 1. Parse H3 cell ID
|
||||
let cell = h3o::CellIndex::from_str(¶ms.h3)
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e)))?;
|
||||
let cell_u64: u64 = cell.into();
|
||||
|
||||
// 2. Validate resolution
|
||||
let resolution = params.resolution as usize;
|
||||
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
||||
return Err((StatusCode::BAD_REQUEST, "Invalid or non-precomputed resolution".to_string()));
|
||||
}
|
||||
|
||||
// 3. Parse filters (reuse existing filter parsing logic from get_hexagons)
|
||||
let parsed_filters: Vec<ParsedFilter> = params
|
||||
.filters
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| {
|
||||
s.split(',')
|
||||
.filter_map(|entry| {
|
||||
let parts: Vec<&str> = entry.splitn(3, ':').collect();
|
||||
if parts.len() != 3 {
|
||||
return None;
|
||||
}
|
||||
let name = parts[0].trim();
|
||||
let min = parts[1].trim().parse::<f64>().ok()?;
|
||||
let max = parts[2].trim().parse::<f64>().ok()?;
|
||||
let feat_idx = state.data.feature_names.iter().position(|n| n == name)?;
|
||||
Some(ParsedFilter { feat_idx, min, max })
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// Move CPU-heavy work off the async executor
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let h3_data = &state.h3_cells[resolution];
|
||||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
|
||||
// 4. Find all rows with matching H3 cell
|
||||
let matching_rows: Vec<usize> = h3_data
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &h3_cell)| {
|
||||
if h3_cell == cell_u64 {
|
||||
// Apply feature filters
|
||||
if row_passes_filters(idx, &parsed_filters, feature_data, num_features) {
|
||||
Some(idx)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total = matching_rows.len();
|
||||
let limit = params.limit.unwrap_or(100).min(500);
|
||||
let offset = params.offset.unwrap_or(0);
|
||||
let truncated = total > offset + limit;
|
||||
|
||||
// 5. Extract properties for paginated subset
|
||||
let properties: Vec<Property> = matching_rows
|
||||
.iter()
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.map(|&row| {
|
||||
// Build dynamic features map
|
||||
let mut features = FxHashMap::default();
|
||||
let base = row * num_features;
|
||||
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
|
||||
let v = feature_data[base + feat_idx];
|
||||
if v.is_finite() {
|
||||
features.insert(feat_name.clone(), v);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to get non-empty string
|
||||
let get_string = |s: &str| -> Option<String> {
|
||||
if s.is_empty() { None } else { Some(s.to_string()) }
|
||||
};
|
||||
|
||||
Property {
|
||||
address: get_string(&state.data.address[row]),
|
||||
postcode: get_string(&state.data.postcode[row]),
|
||||
property_type: get_string(&state.data.property_type[row]),
|
||||
built_form: get_string(&state.data.built_form[row]),
|
||||
current_energy_rating: get_string(&state.data.current_energy_rating[row]),
|
||||
potential_energy_rating: get_string(&state.data.potential_energy_rating[row]),
|
||||
lat: state.data.lat[row],
|
||||
lon: state.data.lon[row],
|
||||
features,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
HexagonPropertiesResponse {
|
||||
properties,
|
||||
total,
|
||||
limit,
|
||||
offset,
|
||||
truncated,
|
||||
}
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Ok(Json(result))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue