has issues
This commit is contained in:
parent
2e112d7398
commit
c645b0f1d4
96 changed files with 2147083 additions and 5787 deletions
|
|
@ -1,4 +1,5 @@
|
|||
mod actual_listings;
|
||||
pub mod crime_by_year;
|
||||
mod places;
|
||||
mod poi;
|
||||
mod postcodes;
|
||||
|
|
@ -35,11 +36,12 @@ where
|
|||
}
|
||||
|
||||
pub use actual_listings::{ActualListing, ActualListingData};
|
||||
pub use crime_by_year::CrimeByYearData;
|
||||
pub use places::{normalize_search_text, PlaceData};
|
||||
pub use poi::{resolve_poi_category_filter, POICategoryGroup, POIData};
|
||||
pub use poi::{resolve_poi_category_filter, POICategoryGroup, POIData, SchoolMetadata};
|
||||
pub use postcodes::{OutcodeData, PostcodeData};
|
||||
pub use property::{
|
||||
precompute_h3, FeatureStats, Histogram, PostcodePoiMetrics, PropertyData, QuantRef,
|
||||
RenovationEvent,
|
||||
precompute_h3, FeatureStats, Histogram, HistoricalPrice, PostcodePoiMetrics, PropertyData,
|
||||
QuantRef, RenovationEvent,
|
||||
};
|
||||
pub use travel_time::{slugify, TravelTimeStore};
|
||||
|
|
|
|||
187
server-rs/src/data/crime_by_year.rs
Normal file
187
server-rs/src/data/crime_by_year.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
//! Per-LSOA per-crime-type per-year crime counts, loaded from a side parquet
|
||||
//! and used by the right pane to plot crime-over-time. Filtering is not
|
||||
//! supported — this data is display-only.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::PlRefPath;
|
||||
use polars::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::info;
|
||||
|
||||
use super::run_polars_io;
|
||||
|
||||
/// Suffix appended to the underlying crime-type column name in the parquet
|
||||
/// (e.g. `"Burglary (by year)"`). Stripped to derive the display name.
|
||||
pub const BY_YEAR_SUFFIX: &str = " (by year)";
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct YearPoint {
|
||||
pub year: i32,
|
||||
pub count: f32,
|
||||
}
|
||||
|
||||
/// One per crime type: ordered list of (year, count) for a single LSOA.
|
||||
pub struct LsoaCrimeSeries {
|
||||
/// Index into `crime_types`.
|
||||
pub type_idx: u16,
|
||||
pub points: Vec<YearPoint>,
|
||||
}
|
||||
|
||||
pub struct CrimeByYearData {
|
||||
/// All crime type names in stable insertion order.
|
||||
pub crime_types: Vec<String>,
|
||||
/// All years available for each crime type, same order as `crime_types`.
|
||||
pub years_by_type: Vec<Vec<i32>>,
|
||||
/// LSOA code → all available per-type series for that LSOA.
|
||||
pub series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>>,
|
||||
}
|
||||
|
||||
impl CrimeByYearData {
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
crime_types: Vec::new(),
|
||||
years_by_type: Vec::new(),
|
||||
series_by_lsoa: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||
run_polars_io(|| Self::load_inner(path))
|
||||
}
|
||||
|
||||
fn load_inner(path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading crime-by-year from {}", path.display());
|
||||
let pl_path = PlRefPath::try_from_path(path).with_context(|| {
|
||||
format!(
|
||||
"Failed to normalize crime-by-year parquet path {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let df = LazyFrame::scan_parquet(pl_path, Default::default())
|
||||
.with_context(|| format!("Failed to scan crime-by-year parquet at {}", path.display()))?
|
||||
.collect()
|
||||
.with_context(|| {
|
||||
format!("Failed to read crime-by-year parquet at {}", path.display())
|
||||
})?;
|
||||
|
||||
let lsoa_col = df
|
||||
.column("LSOA code")
|
||||
.context("crime-by-year parquet missing 'LSOA code' column")?
|
||||
.str()
|
||||
.context("'LSOA code' column is not a string")?;
|
||||
let lsoa_values: Vec<String> = lsoa_col
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
let value =
|
||||
value.with_context(|| format!("crime-by-year row {row} has null LSOA code"))?;
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
bail!("crime-by-year row {row} has blank LSOA code");
|
||||
}
|
||||
Ok(trimmed.to_string())
|
||||
})
|
||||
.collect::<anyhow::Result<Vec<_>>>()?;
|
||||
|
||||
// Discover crime-type columns (anything with the by-year suffix).
|
||||
let crime_type_cols: Vec<(String, String)> = df
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.filter_map(|name| {
|
||||
let name = name.as_str();
|
||||
name.strip_suffix(BY_YEAR_SUFFIX)
|
||||
.map(|stripped| (stripped.to_string(), name.to_string()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
if crime_type_cols.is_empty() {
|
||||
bail!(
|
||||
"crime-by-year parquet at {} has no '* (by year)' columns",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
|
||||
let crime_types: Vec<String> = crime_type_cols.iter().map(|(t, _)| t.clone()).collect();
|
||||
|
||||
let mut series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>> = FxHashMap::default();
|
||||
let mut years_by_type: Vec<Vec<i32>> = Vec::with_capacity(crime_type_cols.len());
|
||||
let row_count = df.height();
|
||||
|
||||
for (type_idx, (_, col_name)) in crime_type_cols.iter().enumerate() {
|
||||
let mut years_for_type = std::collections::BTreeSet::new();
|
||||
let col = df
|
||||
.column(col_name)
|
||||
.with_context(|| format!("Missing crime-by-year column '{col_name}'"))?;
|
||||
let list_ca = col
|
||||
.list()
|
||||
.with_context(|| format!("Column '{col_name}' is not a list"))?;
|
||||
|
||||
for row in 0..row_count {
|
||||
let Some(inner) = list_ca.get_as_series(row) else {
|
||||
continue;
|
||||
};
|
||||
if inner.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let structs = inner
|
||||
.struct_()
|
||||
.with_context(|| format!("Inner of '{col_name}' is not a struct"))?;
|
||||
let years = structs
|
||||
.field_by_name("year")
|
||||
.with_context(|| format!("Missing 'year' field in '{col_name}'"))?;
|
||||
let counts = structs
|
||||
.field_by_name("count")
|
||||
.with_context(|| format!("Missing 'count' field in '{col_name}'"))?;
|
||||
|
||||
let mut points: Vec<YearPoint> = Vec::with_capacity(inner.len());
|
||||
for idx in 0..inner.len() {
|
||||
let yr = match years.get(idx).ok() {
|
||||
Some(AnyValue::Int32(y)) => y,
|
||||
Some(AnyValue::Int64(y)) => y as i32,
|
||||
_ => continue,
|
||||
};
|
||||
let cnt = match counts.get(idx).ok() {
|
||||
Some(AnyValue::Float32(c)) => c,
|
||||
Some(AnyValue::Float64(c)) => c as f32,
|
||||
Some(AnyValue::Int32(c)) => c as f32,
|
||||
Some(AnyValue::Int64(c)) => c as f32,
|
||||
_ => continue,
|
||||
};
|
||||
points.push(YearPoint {
|
||||
year: yr,
|
||||
count: cnt,
|
||||
});
|
||||
years_for_type.insert(yr);
|
||||
}
|
||||
if points.is_empty() {
|
||||
continue;
|
||||
}
|
||||
points.sort_by_key(|p| p.year);
|
||||
|
||||
series_by_lsoa
|
||||
.entry(lsoa_values[row].clone())
|
||||
.or_default()
|
||||
.push(LsoaCrimeSeries {
|
||||
type_idx: type_idx as u16,
|
||||
points,
|
||||
});
|
||||
}
|
||||
years_by_type.push(years_for_type.into_iter().collect());
|
||||
}
|
||||
|
||||
info!(
|
||||
lsoas = series_by_lsoa.len(),
|
||||
crime_types = crime_types.len(),
|
||||
"Crime-by-year data loaded"
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
crime_types,
|
||||
years_by_type,
|
||||
series_by_lsoa,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -131,6 +131,51 @@ pub fn resolve_poi_category_filter(category_values: &[String], categories: &str)
|
|||
selected
|
||||
}
|
||||
|
||||
/// Metadata for state-funded school POIs (sourced from the DfE GIAS register).
|
||||
/// Every field is optional because GIAS does not populate every column for every
|
||||
/// establishment type (e.g. nurseries have no sixth form, FE colleges no FSM).
|
||||
#[derive(Serialize, Clone, Default)]
|
||||
pub struct SchoolMetadata {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub phase: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub r#type: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub type_group: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub age_range: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gender: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub religious_character: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub admissions_policy: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub nursery_provision: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub sixth_form: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub capacity: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub pupils: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fsm_percent: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub trust: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub address: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub postcode: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub local_authority: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub website: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub telephone: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub head_name: Option<String>,
|
||||
}
|
||||
|
||||
pub struct POIData {
|
||||
/// Contiguous buffer holding all POI ID strings end-to-end.
|
||||
id_buffer: String,
|
||||
|
|
@ -149,6 +194,11 @@ pub struct POIData {
|
|||
/// uniform subset when the POI count exceeds the per-request limit.
|
||||
/// Computed once at load time so the same POIs are always chosen for a given viewport.
|
||||
pub priority: Vec<u32>,
|
||||
/// Indirection table: row idx → index into `school_meta`, or u32::MAX when
|
||||
/// the POI is not a school. Keeps the per-row overhead at 4 bytes regardless
|
||||
/// of how many school metadata fields we carry.
|
||||
school_meta_idx: Vec<u32>,
|
||||
school_meta: Vec<SchoolMetadata>,
|
||||
}
|
||||
|
||||
impl POIData {
|
||||
|
|
@ -158,6 +208,16 @@ impl POIData {
|
|||
let length = self.id_lengths[row] as usize;
|
||||
&self.id_buffer[offset..offset + length]
|
||||
}
|
||||
|
||||
/// Get the school metadata for a given row, or None if not a school.
|
||||
pub fn school(&self, row: usize) -> Option<&SchoolMetadata> {
|
||||
let idx = self.school_meta_idx[row];
|
||||
if idx == u32::MAX {
|
||||
None
|
||||
} else {
|
||||
Some(&self.school_meta[idx as usize])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
||||
|
|
@ -195,6 +255,146 @@ fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Read an optional string column. Returns None when the column itself is missing
|
||||
/// (older POI parquets without the school_* extension); returns Some(vec) of
|
||||
/// length row_count where each entry is None for null cells.
|
||||
fn extract_optional_str_col(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
) -> anyhow::Result<Option<Vec<Option<String>>>> {
|
||||
let column = match df.column(name) {
|
||||
Ok(column) => column,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(Some(
|
||||
string_column
|
||||
.into_iter()
|
||||
.map(|value| value.map(ToString::to_string))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
fn extract_optional_u32_col(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
) -> anyhow::Result<Option<Vec<Option<u32>>>> {
|
||||
let column = match df.column(name) {
|
||||
Ok(column) => column,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let cast = column
|
||||
.cast(&DataType::Int64)
|
||||
.with_context(|| format!("Failed to cast column '{name}' to Int64"))?;
|
||||
let int_column = cast
|
||||
.i64()
|
||||
.with_context(|| format!("Column '{name}' is not an integer column"))?;
|
||||
Ok(Some(
|
||||
int_column
|
||||
.into_iter()
|
||||
.map(|value| value.and_then(|v| if v < 0 { None } else { Some(v as u32) }))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
fn extract_optional_f32_col(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
) -> anyhow::Result<Option<Vec<Option<f32>>>> {
|
||||
let column = match df.column(name) {
|
||||
Ok(column) => column,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let cast = column
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast column '{name}' to Float32"))?;
|
||||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(Some(float_column.into_iter().collect()))
|
||||
}
|
||||
|
||||
fn build_school_meta(
|
||||
row_count: usize,
|
||||
df: &DataFrame,
|
||||
) -> anyhow::Result<(Vec<u32>, Vec<SchoolMetadata>)> {
|
||||
let phase = extract_optional_str_col(df, "school_phase")?;
|
||||
if phase.is_none() {
|
||||
// POI parquet predates the school metadata extension — record an empty
|
||||
// table and a sentinel-filled index, so callers transparently see None.
|
||||
return Ok((vec![u32::MAX; row_count], Vec::new()));
|
||||
}
|
||||
|
||||
let phase = phase.unwrap();
|
||||
let r#type = extract_optional_str_col(df, "school_type")?.unwrap_or_default();
|
||||
let type_group = extract_optional_str_col(df, "school_type_group")?.unwrap_or_default();
|
||||
let age_range = extract_optional_str_col(df, "school_age_range")?.unwrap_or_default();
|
||||
let gender = extract_optional_str_col(df, "school_gender")?.unwrap_or_default();
|
||||
let religious_character =
|
||||
extract_optional_str_col(df, "school_religious_character")?.unwrap_or_default();
|
||||
let admissions_policy =
|
||||
extract_optional_str_col(df, "school_admissions_policy")?.unwrap_or_default();
|
||||
let nursery_provision =
|
||||
extract_optional_str_col(df, "school_nursery_provision")?.unwrap_or_default();
|
||||
let sixth_form = extract_optional_str_col(df, "school_sixth_form")?.unwrap_or_default();
|
||||
let capacity = extract_optional_u32_col(df, "school_capacity")?.unwrap_or_default();
|
||||
let pupils = extract_optional_u32_col(df, "school_pupils")?.unwrap_or_default();
|
||||
let fsm_percent = extract_optional_f32_col(df, "school_fsm_percent")?.unwrap_or_default();
|
||||
let trust = extract_optional_str_col(df, "school_trust")?.unwrap_or_default();
|
||||
let address = extract_optional_str_col(df, "school_address")?.unwrap_or_default();
|
||||
let postcode = extract_optional_str_col(df, "school_postcode")?.unwrap_or_default();
|
||||
let local_authority =
|
||||
extract_optional_str_col(df, "school_local_authority")?.unwrap_or_default();
|
||||
let website = extract_optional_str_col(df, "school_website")?.unwrap_or_default();
|
||||
let telephone = extract_optional_str_col(df, "school_telephone")?.unwrap_or_default();
|
||||
let head_name = extract_optional_str_col(df, "school_head_name")?.unwrap_or_default();
|
||||
|
||||
let fetch_str = |col: &Vec<Option<String>>, row: usize| -> Option<String> {
|
||||
col.get(row).cloned().flatten()
|
||||
};
|
||||
let fetch_u32 =
|
||||
|col: &Vec<Option<u32>>, row: usize| -> Option<u32> { col.get(row).copied().flatten() };
|
||||
let fetch_f32 =
|
||||
|col: &Vec<Option<f32>>, row: usize| -> Option<f32> { col.get(row).copied().flatten() };
|
||||
|
||||
let mut idx = vec![u32::MAX; row_count];
|
||||
let mut meta = Vec::new();
|
||||
for row in 0..row_count {
|
||||
let type_group_val = fetch_str(&type_group, row);
|
||||
let type_val = fetch_str(&r#type, row);
|
||||
// type_group is present for every GIAS row, so use it as the sentinel
|
||||
// for "this POI is a school" — matches the pipeline guarantee.
|
||||
if type_group_val.is_none() && type_val.is_none() {
|
||||
continue;
|
||||
}
|
||||
idx[row] = meta.len() as u32;
|
||||
meta.push(SchoolMetadata {
|
||||
phase: fetch_str(&phase, row),
|
||||
r#type: type_val,
|
||||
type_group: type_group_val,
|
||||
age_range: fetch_str(&age_range, row),
|
||||
gender: fetch_str(&gender, row),
|
||||
religious_character: fetch_str(&religious_character, row),
|
||||
admissions_policy: fetch_str(&admissions_policy, row),
|
||||
nursery_provision: fetch_str(&nursery_provision, row),
|
||||
sixth_form: fetch_str(&sixth_form, row),
|
||||
capacity: fetch_u32(&capacity, row),
|
||||
pupils: fetch_u32(&pupils, row),
|
||||
fsm_percent: fetch_f32(&fsm_percent, row),
|
||||
trust: fetch_str(&trust, row),
|
||||
address: fetch_str(&address, row),
|
||||
postcode: fetch_str(&postcode, row),
|
||||
local_authority: fetch_str(&local_authority, row),
|
||||
website: fetch_str(&website, row),
|
||||
telephone: fetch_str(&telephone, row),
|
||||
head_name: fetch_str(&head_name, row),
|
||||
});
|
||||
}
|
||||
Ok((idx, meta))
|
||||
}
|
||||
|
||||
impl POIData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
|
|
@ -259,6 +459,9 @@ impl POIData {
|
|||
// preventing visual "shuffling" when panning the map.
|
||||
let priority = generate_priorities(row_count);
|
||||
|
||||
let (school_meta_idx, school_meta) = build_school_meta(row_count, &df)?;
|
||||
info!(schools = school_meta.len(), "Loaded GIAS school metadata");
|
||||
|
||||
info!("POI data loading complete.");
|
||||
|
||||
Ok(POIData {
|
||||
|
|
@ -273,6 +476,8 @@ impl POIData {
|
|||
lng,
|
||||
emoji,
|
||||
priority,
|
||||
school_meta_idx,
|
||||
school_meta,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -569,6 +569,13 @@ pub struct RenovationEvent {
|
|||
pub event: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub struct HistoricalPrice {
|
||||
pub year: i32,
|
||||
pub month: u8,
|
||||
pub price: i64,
|
||||
}
|
||||
|
||||
/// Lightweight reference to quantization parameters for decoding u16 feature data.
|
||||
pub struct QuantRef<'a> {
|
||||
pub dequant_a: &'a [f32],
|
||||
|
|
@ -824,6 +831,10 @@ pub struct PropertyData {
|
|||
/// Interned postcodes: reader is thread-safe, keys index into it.
|
||||
postcode_interner: lasso::RodeoReader,
|
||||
postcode_keys: Vec<lasso::Spur>,
|
||||
/// Interned LSOA (2021) codes per row.
|
||||
/// Used to look up per-LSOA side tables (e.g. crime time series).
|
||||
lsoa_interner: lasso::RodeoReader,
|
||||
lsoa_keys: Vec<lasso::Spur>,
|
||||
/// Rows for each postcode, keyed by the interned postcode key.
|
||||
postcode_row_index: FxHashMap<lasso::Spur, Vec<u32>>,
|
||||
/// Inverted index from address tokens to property rows.
|
||||
|
|
@ -850,6 +861,9 @@ pub struct PropertyData {
|
|||
/// Per-row renovation events. Keyed by (permuted) row index.
|
||||
/// Only rows with events are present in the map.
|
||||
renovation_history: FxHashMap<u32, Vec<RenovationEvent>>,
|
||||
/// Per-row historical sale transactions (Land Registry price-paid).
|
||||
/// Keyed by (permuted) row index. Only rows with prices are present.
|
||||
historical_prices: FxHashMap<u32, Vec<HistoricalPrice>>,
|
||||
property_sub_type: FxHashMap<u32, String>,
|
||||
price_qualifier: FxHashMap<u32, String>,
|
||||
}
|
||||
|
|
@ -867,6 +881,11 @@ impl PropertyData {
|
|||
self.postcode_interner.resolve(&self.postcode_keys[row])
|
||||
}
|
||||
|
||||
/// Get the LSOA (2021) code for a given row.
|
||||
pub fn lsoa(&self, row: usize) -> &str {
|
||||
self.lsoa_interner.resolve(&self.lsoa_keys[row])
|
||||
}
|
||||
|
||||
/// Get postcode components for field-level borrowing (avoids conflicting borrows with feature_data).
|
||||
pub fn postcode_parts(&self) -> (&lasso::RodeoReader, &[lasso::Spur]) {
|
||||
(&self.postcode_interner, &self.postcode_keys)
|
||||
|
|
@ -1044,6 +1063,14 @@ impl PropertyData {
|
|||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Get historical sale transactions for a given row (empty slice if none).
|
||||
pub fn historical_prices(&self, row: usize) -> &[HistoricalPrice] {
|
||||
self.historical_prices
|
||||
.get(&(row as u32))
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Get property sub-type for a given row.
|
||||
pub fn property_sub_type(&self, row: usize) -> Option<&str> {
|
||||
self.property_sub_type
|
||||
|
|
@ -1505,6 +1532,15 @@ impl PropertyData {
|
|||
}
|
||||
}
|
||||
|
||||
// LSOA (2021) code per row, brought in via the postcode join. Used as a
|
||||
// lookup key into per-LSOA side tables (e.g. crime time series).
|
||||
match schema.get("lsoa21") {
|
||||
Some(dtype) if matches!(dtype, DataType::String) || dtype.is_categorical() => {}
|
||||
Some(dtype) => bail!("Column 'lsoa21' has unexpected type {:?}", dtype),
|
||||
None => bail!("Required column 'lsoa21' not found in joined property data"),
|
||||
}
|
||||
select_exprs.push(col("lsoa21").cast(DataType::String));
|
||||
|
||||
// Enum features as String
|
||||
for &name in &enum_names {
|
||||
select_exprs.push(col(name).cast(DataType::String));
|
||||
|
|
@ -1519,6 +1555,10 @@ impl PropertyData {
|
|||
if has_renovation_history {
|
||||
select_exprs.push(col("renovation_history"));
|
||||
}
|
||||
let has_historical_prices = schema.get("historical_prices").is_some();
|
||||
if has_historical_prices {
|
||||
select_exprs.push(col("historical_prices"));
|
||||
}
|
||||
let df = combined_lf
|
||||
.filter(col("lat").is_not_null().and(col("lon").is_not_null()))
|
||||
.select(select_exprs)
|
||||
|
|
@ -1655,9 +1695,33 @@ impl PropertyData {
|
|||
Ok(vec![None; row_count])
|
||||
}
|
||||
};
|
||||
let extract_required_trimmed_string_col =
|
||||
|df: &DataFrame, name: &str| -> anyhow::Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Required column '{name}' not found in parquet"))?;
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
string_column
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
let value = value.with_context(|| {
|
||||
format!("Required column '{name}' has null at row {row}")
|
||||
})?;
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
bail!("Required column '{name}' has blank value at row {row}");
|
||||
}
|
||||
Ok(trimmed.to_string())
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let property_sub_type_raw = extract_optional_string_col(&df, "Property sub-type")?;
|
||||
let price_qualifier_raw = extract_optional_string_col(&df, "Price qualifier")?;
|
||||
let lsoa_raw = extract_required_trimmed_string_col(&df, "lsoa21")?;
|
||||
|
||||
tracing::info!("Building enum features");
|
||||
// enum_col_major: Vec<(values_list, encoded_as_f32)>
|
||||
|
|
@ -1801,6 +1865,70 @@ impl PropertyData {
|
|||
FxHashMap::default()
|
||||
};
|
||||
|
||||
// Extract historical_prices: List<Struct{year: i32, month: u8, price: i64}>
|
||||
let mut historical_prices_raw: FxHashMap<u32, Vec<HistoricalPrice>> =
|
||||
if has_historical_prices {
|
||||
tracing::info!("Extracting historical prices");
|
||||
let prices_col = df
|
||||
.column("historical_prices")
|
||||
.context("Missing historical_prices column")?;
|
||||
let list_ca = prices_col
|
||||
.list()
|
||||
.context("historical_prices is not a list column")?;
|
||||
|
||||
let mut history: FxHashMap<u32, Vec<HistoricalPrice>> = FxHashMap::default();
|
||||
for old_row in 0..row_count {
|
||||
if let Some(inner) = list_ca.get_as_series(old_row) {
|
||||
if inner.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let structs = inner
|
||||
.struct_()
|
||||
.context("historical_prices inner is not a struct")?;
|
||||
let years = structs
|
||||
.field_by_name("year")
|
||||
.context("Missing 'year' field in historical_prices struct")?;
|
||||
let months = structs
|
||||
.field_by_name("month")
|
||||
.context("Missing 'month' field in historical_prices struct")?;
|
||||
let prices = structs
|
||||
.field_by_name("price")
|
||||
.context("Missing 'price' field in historical_prices struct")?;
|
||||
|
||||
let mut row_prices = Vec::new();
|
||||
for idx in 0..inner.len() {
|
||||
let year = years.get(idx).context("Failed to get year value")?;
|
||||
let month = months.get(idx).context("Failed to get month value")?;
|
||||
let price = prices.get(idx).context("Failed to get price value")?;
|
||||
let AnyValue::Int32(year_i32) = year else {
|
||||
bail!("historical_prices.year is not Int32 at row {old_row}, got {year:?}");
|
||||
};
|
||||
let AnyValue::UInt8(month_u8) = month else {
|
||||
bail!("historical_prices.month is not UInt8 at row {old_row}, got {month:?}");
|
||||
};
|
||||
let AnyValue::Int64(price_i64) = price else {
|
||||
bail!("historical_prices.price is not Int64 at row {old_row}, got {price:?}");
|
||||
};
|
||||
row_prices.push(HistoricalPrice {
|
||||
year: year_i32,
|
||||
month: month_u8,
|
||||
price: price_i64,
|
||||
});
|
||||
}
|
||||
if !row_prices.is_empty() {
|
||||
history.insert(old_row as u32, row_prices);
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
properties_with_prices = history.len(),
|
||||
"Historical prices extracted"
|
||||
);
|
||||
history
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
};
|
||||
|
||||
// Free the projected joined frame before building the row-major matrix.
|
||||
drop(df);
|
||||
|
||||
|
|
@ -1904,6 +2032,14 @@ impl PropertyData {
|
|||
}
|
||||
let postcode_interner = postcode_rodeo.into_reader();
|
||||
|
||||
// Intern LSOA codes (permuted).
|
||||
let mut lsoa_rodeo = lasso::Rodeo::default();
|
||||
let mut lsoa_keys: Vec<lasso::Spur> = Vec::with_capacity(row_count);
|
||||
for &perm_index in perm.iter() {
|
||||
lsoa_keys.push(lsoa_rodeo.get_or_intern(&lsoa_raw[perm_index as usize]));
|
||||
}
|
||||
let lsoa_interner = lsoa_rodeo.into_reader();
|
||||
|
||||
let row_to_poi_metric_idx: Vec<u32> = if poi_metrics.is_empty() {
|
||||
vec![NO_POI_METRIC_ROW; row_count]
|
||||
} else {
|
||||
|
|
@ -1939,6 +2075,20 @@ impl PropertyData {
|
|||
map
|
||||
};
|
||||
|
||||
// Re-key historical_prices by permuted row index
|
||||
let historical_prices: FxHashMap<u32, Vec<HistoricalPrice>> = {
|
||||
let mut map = FxHashMap::with_capacity_and_hasher(
|
||||
historical_prices_raw.len(),
|
||||
Default::default(),
|
||||
);
|
||||
for (new_row, &old_row) in perm.iter().enumerate() {
|
||||
if let Some(prices) = historical_prices_raw.remove(&old_row) {
|
||||
map.insert(new_row as u32, prices);
|
||||
}
|
||||
}
|
||||
map
|
||||
};
|
||||
|
||||
// Permute optional string columns into sparse HashMaps
|
||||
let property_sub_type: FxHashMap<u32, String> = {
|
||||
let mut map = FxHashMap::default();
|
||||
|
|
@ -2061,6 +2211,8 @@ impl PropertyData {
|
|||
address_lengths,
|
||||
postcode_interner,
|
||||
postcode_keys,
|
||||
lsoa_interner,
|
||||
lsoa_keys,
|
||||
postcode_row_index,
|
||||
address_token_index,
|
||||
address_prefix_index,
|
||||
|
|
@ -2072,6 +2224,7 @@ impl PropertyData {
|
|||
enum_counts,
|
||||
approx_build_date_bits,
|
||||
renovation_history,
|
||||
historical_prices,
|
||||
property_sub_type,
|
||||
price_qualifier,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -180,6 +180,20 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: true,
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Within conservation area",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether the postcode point falls inside a designated conservation area",
|
||||
detail: "Historic England conservation area boundaries, matched to the postcode representative point. The national dataset is indicative rather than definitive, so boundary-sensitive decisions should be checked with the local planning authority.",
|
||||
source: "conservation-areas",
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Listed building",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether this property appears to match a Historic England listed building entry",
|
||||
detail: "Historic England National Heritage List for England listed-building points, matched conservatively to property addresses using the listed-entry name and nearby postcode candidates. Treat this as a screening signal, not a legal determination: verify any specific property on the NHLE and with the local planning authority.",
|
||||
source: "listed-buildings",
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Noise (dB)",
|
||||
bounds: Bounds::Fixed {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ mod routes;
|
|||
mod state;
|
||||
pub mod utils;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
|
|
@ -167,6 +167,18 @@ struct Cli {
|
|||
#[arg(long)]
|
||||
tiles: PathBuf,
|
||||
|
||||
/// Optional PMTiles raster overlay for high-resolution strategic noise.
|
||||
#[arg(long, env = "NOISE_OVERLAY_TILES")]
|
||||
noise_overlay_tiles: Option<PathBuf>,
|
||||
|
||||
/// Optional PMTiles vector overlay for crime heatmap points.
|
||||
#[arg(long, env = "CRIME_HOTSPOT_TILES")]
|
||||
crime_hotspot_tiles: Option<PathBuf>,
|
||||
|
||||
/// Optional PMTiles vector overlay for Trees Outside Woodland polygons.
|
||||
#[arg(long, env = "TREE_OVERLAY_TILES")]
|
||||
tree_overlay_tiles: Option<PathBuf>,
|
||||
|
||||
/// Path to the frontend dist directory (optional; disables static serving and OG injection when omitted)
|
||||
#[arg(long)]
|
||||
dist: Option<PathBuf>,
|
||||
|
|
@ -207,6 +219,10 @@ struct Cli {
|
|||
#[arg(long, env = "ACTUAL_LISTINGS_PATH")]
|
||||
actual_listings_path: Option<PathBuf>,
|
||||
|
||||
/// Optional path to the per-LSOA per-year crime parquet (display-only side table for the right pane).
|
||||
#[arg(long, env = "CRIME_BY_YEAR_PATH")]
|
||||
crime_by_year_path: Option<PathBuf>,
|
||||
|
||||
/// Google Maps API key for Street View metadata lookups
|
||||
#[arg(long, env = "GOOGLE_MAPS_API_KEY")]
|
||||
google_maps_api_key: String,
|
||||
|
|
@ -280,6 +296,36 @@ async fn capture_server_error_responses(
|
|||
response
|
||||
}
|
||||
|
||||
async fn init_optional_tile_reader(
|
||||
label: &'static str,
|
||||
path: Option<&PathBuf>,
|
||||
) -> anyhow::Result<Option<Arc<routes::TileReader>>> {
|
||||
let Some(path) = path else {
|
||||
info!("{label} overlay tiles not configured");
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if !path.exists() {
|
||||
bail!("{label} overlay PMTiles not found: {}", path.display());
|
||||
}
|
||||
|
||||
info!("Loading {label} overlay PMTiles from {}", path.display());
|
||||
Ok(Some(Arc::new(routes::init_tile_reader(path).await?)))
|
||||
}
|
||||
|
||||
fn configured_or_default_overlay_path(
|
||||
configured: &Option<PathBuf>,
|
||||
tiles_path: &Path,
|
||||
file_name: &str,
|
||||
) -> Option<PathBuf> {
|
||||
if let Some(path) = configured {
|
||||
return Some(path.clone());
|
||||
}
|
||||
|
||||
let default_path = tiles_path.parent()?.join(file_name);
|
||||
default_path.exists().then_some(default_path)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
|
@ -424,6 +470,29 @@ async fn main() -> anyhow::Result<()> {
|
|||
let tile_reader = Arc::new(routes::init_tile_reader(tiles_path).await?);
|
||||
info!("PMTiles loaded successfully");
|
||||
|
||||
let noise_overlay_tiles = configured_or_default_overlay_path(
|
||||
&cli.noise_overlay_tiles,
|
||||
tiles_path,
|
||||
"noise_lden_10m.pmtiles",
|
||||
);
|
||||
let crime_hotspot_tiles = configured_or_default_overlay_path(
|
||||
&cli.crime_hotspot_tiles,
|
||||
tiles_path,
|
||||
"crime_hotspots.pmtiles",
|
||||
);
|
||||
let tree_overlay_tiles = configured_or_default_overlay_path(
|
||||
&cli.tree_overlay_tiles,
|
||||
tiles_path,
|
||||
"trees_outside_woodlands.pmtiles",
|
||||
);
|
||||
|
||||
let noise_overlay_reader =
|
||||
init_optional_tile_reader("Noise", noise_overlay_tiles.as_ref()).await?;
|
||||
let crime_hotspot_reader =
|
||||
init_optional_tile_reader("Crime hotspots", crime_hotspot_tiles.as_ref()).await?;
|
||||
let tree_overlay_reader =
|
||||
init_optional_tile_reader("Trees outside woodland", tree_overlay_tiles.as_ref()).await?;
|
||||
|
||||
let feature_name_to_index: rustc_hash::FxHashMap<String, usize> = property_data
|
||||
.feature_names
|
||||
.iter()
|
||||
|
|
@ -550,6 +619,18 @@ async fn main() -> anyhow::Result<()> {
|
|||
None
|
||||
};
|
||||
|
||||
let crime_by_year = if let Some(path) = cli.crime_by_year_path.as_ref() {
|
||||
if !path.exists() {
|
||||
bail!("Crime-by-year parquet not found: {}", path.display());
|
||||
}
|
||||
let data = data::CrimeByYearData::load(path)?;
|
||||
trim_allocator("crime-by-year load");
|
||||
Arc::new(data)
|
||||
} else {
|
||||
info!("CRIME_BY_YEAR_PATH not set; crime-over-time chart disabled");
|
||||
Arc::new(data::CrimeByYearData::empty())
|
||||
};
|
||||
|
||||
let app_state = AppState {
|
||||
data: property_data,
|
||||
grid,
|
||||
|
|
@ -576,6 +657,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
gemini_model: cli.gemini_model,
|
||||
travel_time_store,
|
||||
actual_listings,
|
||||
crime_by_year,
|
||||
token_cache,
|
||||
superuser_token_cache,
|
||||
share_cache,
|
||||
|
|
@ -610,6 +692,9 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
let reader_tile = tile_reader.clone();
|
||||
let reader_style = tile_reader.clone();
|
||||
let reader_noise_overlay = noise_overlay_reader.clone();
|
||||
let reader_crime_hotspot = crime_hotspot_reader.clone();
|
||||
let reader_tree_overlay = tree_overlay_reader.clone();
|
||||
let public_url_tiles = initial_state.public_url.clone();
|
||||
|
||||
let api = Router::new()
|
||||
|
|
@ -773,6 +858,42 @@ async fn main() -> anyhow::Result<()> {
|
|||
})
|
||||
.layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/overlays/noise/{z}/{x}/{y}",
|
||||
get(move |path| {
|
||||
routes::get_overlay_tile(
|
||||
reader_noise_overlay.clone(),
|
||||
routes::OverlayTileFormat::RasterPng,
|
||||
"noise",
|
||||
path,
|
||||
)
|
||||
})
|
||||
.layer(ConcurrencyLimitLayer::new(30)),
|
||||
)
|
||||
.route(
|
||||
"/api/overlays/crime-hotspots/{z}/{x}/{y}",
|
||||
get(move |path| {
|
||||
routes::get_overlay_tile(
|
||||
reader_crime_hotspot.clone(),
|
||||
routes::OverlayTileFormat::VectorMvtGzip,
|
||||
"crime-hotspots",
|
||||
path,
|
||||
)
|
||||
})
|
||||
.layer(ConcurrencyLimitLayer::new(30)),
|
||||
)
|
||||
.route(
|
||||
"/api/overlays/trees-outside-woodlands/{z}/{x}/{y}",
|
||||
get(move |path| {
|
||||
routes::get_overlay_tile(
|
||||
reader_tree_overlay.clone(),
|
||||
routes::OverlayTileFormat::VectorMvtGzip,
|
||||
"trees-outside-woodlands",
|
||||
path,
|
||||
)
|
||||
})
|
||||
.layer(ConcurrencyLimitLayer::new(30)),
|
||||
)
|
||||
.route("/health", get(|| async { "ok" }))
|
||||
.route(
|
||||
"/metrics",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ mod invites;
|
|||
mod journey;
|
||||
mod me;
|
||||
mod newsletter;
|
||||
mod overlays;
|
||||
mod pb_proxy;
|
||||
mod places;
|
||||
mod pois;
|
||||
|
|
@ -42,6 +43,7 @@ pub use invites::{get_invite, get_invites, post_invites, post_redeem_invite};
|
|||
pub use journey::get_journey;
|
||||
pub use me::get_me;
|
||||
pub use newsletter::patch_newsletter;
|
||||
pub use overlays::{get_overlay_tile, OverlayTileFormat};
|
||||
pub use pb_proxy::proxy_to_pocketbase;
|
||||
pub use places::get_places;
|
||||
pub use pois::{get_poi_categories, get_pois};
|
||||
|
|
@ -56,6 +58,6 @@ pub use shorten::{get_share_links, get_short_url, post_shorten};
|
|||
pub use streetview::get_streetview;
|
||||
pub use stripe_webhook::post_stripe_webhook;
|
||||
pub use telemetry::post_telemetry;
|
||||
pub use tiles::{get_style, get_tile, init_tile_reader};
|
||||
pub use tiles::{get_style, get_tile, init_tile_reader, TileReader};
|
||||
pub use travel_destinations::get_travel_destinations;
|
||||
pub use travel_modes::get_travel_modes;
|
||||
|
|
|
|||
|
|
@ -408,6 +408,9 @@ pub fn build_system_prompt(
|
|||
- \"cycle\" / \"bike\" / \"cycling\" = bicycle mode\n\
|
||||
- \"walk\" / \"walking\" / \"on foot\" = walking mode\n\
|
||||
- \"train\" / \"tube\" / \"bus\" / \"public transport\" / \"commute\" = transit mode\n\
|
||||
- \"without buses\" / \"no bus\" / \"rail only\" = transit-no-bus mode\n\
|
||||
- \"no change\" / \"no transfer\" / \"direct\" / \"single bus/train\" = transit-no-change mode\n\
|
||||
- \"no change and no bus\" / \"direct rail/tube\" = transit-no-change-no-bus mode\n\
|
||||
- If a mode appears in the available mode list but is not named above, you may still \
|
||||
use the exact mode string from the list.\n\
|
||||
\n\
|
||||
|
|
@ -417,7 +420,7 @@ pub fn build_system_prompt(
|
|||
mention it in \"notes\" (e.g. \"No travel data for: Gatwick Airport\") and do NOT \
|
||||
include a travel_time_filter for it.\n\
|
||||
\n\
|
||||
Travel time values are in MINUTES (0-120 range).\n\
|
||||
Travel time values are in MINUTES (0-90 range; data is capped at 90 min).\n\
|
||||
- \"within 30 minutes\" = set \"max\": 30\n\
|
||||
- \"at least 10 minutes\" = set \"min\": 10\n\
|
||||
- \"30-45 minute commute\" = set \"min\": 30 and \"max\": 45 on the same travel_time_filter\n\
|
||||
|
|
@ -1256,11 +1259,15 @@ pub async fn post_ai_filters(
|
|||
))
|
||||
}
|
||||
|
||||
/// Maximum travel-time minutes the data can contain. Matches the Java pipeline's
|
||||
/// MAX_TRIP_DURATION_MINUTES and the frontend's MAX_TRAVEL_MINUTES.
|
||||
const TRAVEL_TIME_MAX_MINUTES: f64 = 90.0;
|
||||
|
||||
fn travel_time_minute_field(item: &Value, key: &str) -> Option<f32> {
|
||||
item.get(key)
|
||||
.and_then(|val| val.as_f64())
|
||||
.filter(|val| val.is_finite())
|
||||
.map(|val| val.clamp(0.0, 120.0) as f32)
|
||||
.map(|val| val.clamp(0.0, TRAVEL_TIME_MAX_MINUTES) as f32)
|
||||
}
|
||||
|
||||
fn parse_travel_time_bounds(item: &Value) -> (Option<f32>, Option<f32>) {
|
||||
|
|
@ -1527,7 +1534,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn travel_time_bounds_clamp_and_order_range() {
|
||||
// Data ceiling is 90 (matches Java MAX_TRIP_DURATION_MINUTES).
|
||||
// Inputs outside [0, 90] clamp; min/max ordering is preserved as-given here.
|
||||
let item = json!({ "min": 150, "max": -10 });
|
||||
assert_eq!(parse_travel_time_bounds(&item), (Some(0.0), Some(120.0)));
|
||||
assert_eq!(parse_travel_time_bounds(&item), (Some(0.0), Some(90.0)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,6 +138,7 @@ fn build_frontend_params(
|
|||
zoom: f64,
|
||||
filters_str: Option<&str>,
|
||||
travel_params: &[String],
|
||||
overlay_params: &[String],
|
||||
share: Option<&str>,
|
||||
) -> String {
|
||||
let mut parts = vec![
|
||||
|
|
@ -159,18 +160,23 @@ fn build_frontend_params(
|
|||
parts.push(format!("tt={}", urlencoding::encode(entry.trim())));
|
||||
}
|
||||
}
|
||||
for entry in overlay_params {
|
||||
if !entry.is_empty() {
|
||||
parts.push(format!("overlay={}", urlencoding::encode(entry.trim())));
|
||||
}
|
||||
}
|
||||
if let Some(share) = share.filter(|value| !value.is_empty()) {
|
||||
parts.push(format!("share={}", urlencoding::encode(share)));
|
||||
}
|
||||
parts.join("&")
|
||||
}
|
||||
|
||||
fn collect_travel_state_params(query: Option<&str>) -> Vec<String> {
|
||||
fn collect_repeated_state_params(query: Option<&str>, target_key: &str) -> Vec<String> {
|
||||
query
|
||||
.into_iter()
|
||||
.flat_map(|qs| url::form_urlencoded::parse(qs.as_bytes()))
|
||||
.filter_map(|(key, value)| {
|
||||
if key == "tt" && !value.is_empty() {
|
||||
if key == target_key && !value.is_empty() {
|
||||
Some(value.into_owned())
|
||||
} else {
|
||||
None
|
||||
|
|
@ -179,6 +185,14 @@ fn collect_travel_state_params(query: Option<&str>) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn collect_travel_state_params(query: Option<&str>) -> Vec<String> {
|
||||
collect_repeated_state_params(query, "tt")
|
||||
}
|
||||
|
||||
fn collect_overlay_state_params(query: Option<&str>) -> Vec<String> {
|
||||
collect_repeated_state_params(query, "overlay")
|
||||
}
|
||||
|
||||
pub async fn get_export(
|
||||
State(shared): State<Arc<SharedState>>,
|
||||
headers: HeaderMap,
|
||||
|
|
@ -221,6 +235,7 @@ pub async fn get_export(
|
|||
.iter()
|
||||
.any(|entry| entry.filter_min.is_some() && entry.filter_max.is_some());
|
||||
let travel_state_params = collect_travel_state_params(uri.query());
|
||||
let overlay_state_params = collect_overlay_state_params(uri.query());
|
||||
let fields_str = params.fields;
|
||||
let share_code = params.share;
|
||||
|
||||
|
|
@ -241,6 +256,7 @@ pub async fn get_export(
|
|||
zoom,
|
||||
filters_str.as_deref(),
|
||||
&travel_state_params,
|
||||
&overlay_state_params,
|
||||
share_code.as_deref(),
|
||||
);
|
||||
|
||||
|
|
@ -776,6 +792,16 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collect_overlay_state_params_preserves_repeated_overlay_params() {
|
||||
let query = "bounds=1,2,3,4&overlay=noise&overlay=crime-hotspots";
|
||||
|
||||
assert_eq!(
|
||||
collect_overlay_state_params(Some(query)),
|
||||
vec!["noise", "crime-hotspots"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn export_query_deserializes_when_tt_is_a_single_string() {
|
||||
let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"
|
||||
|
|
|
|||
|
|
@ -66,6 +66,20 @@ pub struct PricePoint {
|
|||
pub price: f32,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct CrimeYearPoint {
|
||||
pub year: i32,
|
||||
pub count: f32,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct CrimeYearStats {
|
||||
/// Underlying crime type (e.g. "Burglary"). Matches existing crime feature
|
||||
/// names with the `" (avg/yr)"` suffix stripped.
|
||||
pub name: String,
|
||||
pub points: Vec<CrimeYearPoint>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct FilterExclusion {
|
||||
pub name: String,
|
||||
|
|
@ -114,6 +128,8 @@ pub struct HexagonStatsResponse {
|
|||
pub enum_features: Vec<EnumFeatureStats>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub price_history: Vec<PricePoint>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub crime_by_year: Vec<CrimeYearStats>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub central_postcode: Option<String>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
|
|
@ -593,6 +609,14 @@ pub async fn get_hexagon_stats(
|
|||
let price_history =
|
||||
stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index);
|
||||
|
||||
let crime_by_year = stats::compute_crime_by_year(
|
||||
&matching_rows,
|
||||
&state.data,
|
||||
&state.crime_by_year,
|
||||
fields_specified,
|
||||
&field_set,
|
||||
);
|
||||
|
||||
let (mut numeric_features, enum_features_out) = stats::compute_feature_stats(
|
||||
&matching_rows,
|
||||
&state.data,
|
||||
|
|
@ -626,6 +650,7 @@ pub async fn get_hexagon_stats(
|
|||
numeric_features,
|
||||
enum_features: enum_features_out,
|
||||
price_history,
|
||||
crime_by_year,
|
||||
central_postcode,
|
||||
filter_exclusions,
|
||||
})
|
||||
|
|
|
|||
85
server-rs/src/routes/overlays.rs
Normal file
85
server-rs/src/routes/overlays.rs
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Path;
|
||||
use axum::http::{header, HeaderMap, HeaderValue, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use pmtiles::TileCoord;
|
||||
use tracing::warn;
|
||||
|
||||
use super::TileReader;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum OverlayTileFormat {
|
||||
VectorMvtGzip,
|
||||
RasterPng,
|
||||
}
|
||||
|
||||
impl OverlayTileFormat {
|
||||
fn content_type(self) -> &'static str {
|
||||
match self {
|
||||
Self::VectorMvtGzip => "application/x-protobuf",
|
||||
Self::RasterPng => "image/png",
|
||||
}
|
||||
}
|
||||
|
||||
fn is_gzip_encoded(self) -> bool {
|
||||
matches!(self, Self::VectorMvtGzip)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_overlay_tile(
|
||||
reader: Option<Arc<TileReader>>,
|
||||
format: OverlayTileFormat,
|
||||
overlay_name: &'static str,
|
||||
Path((zoom, col, row)): Path<(u8, u32, u32)>,
|
||||
) -> Response {
|
||||
let Some(reader) = reader else {
|
||||
return StatusCode::NOT_FOUND.into_response();
|
||||
};
|
||||
|
||||
let tile_coord = match TileCoord::new(zoom, col, row) {
|
||||
Ok(tile_coord) => tile_coord,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
overlay = overlay_name,
|
||||
zoom,
|
||||
col,
|
||||
row,
|
||||
error = %err,
|
||||
"Invalid overlay tile coordinate"
|
||||
);
|
||||
return StatusCode::BAD_REQUEST.into_response();
|
||||
}
|
||||
};
|
||||
|
||||
match reader.get_tile(tile_coord).await {
|
||||
Ok(Some(tile_bytes)) => {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
header::CONTENT_TYPE,
|
||||
HeaderValue::from_static(format.content_type()),
|
||||
);
|
||||
headers.insert(
|
||||
header::CACHE_CONTROL,
|
||||
HeaderValue::from_static("public, max-age=86400"),
|
||||
);
|
||||
if format.is_gzip_encoded() {
|
||||
headers.insert(header::CONTENT_ENCODING, HeaderValue::from_static("gzip"));
|
||||
}
|
||||
|
||||
(StatusCode::OK, headers, tile_bytes.to_vec()).into_response()
|
||||
}
|
||||
Ok(None) => StatusCode::NO_CONTENT.into_response(),
|
||||
Err(err) => {
|
||||
warn!(
|
||||
overlay = overlay_name,
|
||||
zoom,
|
||||
col,
|
||||
row,
|
||||
error = %err,
|
||||
"Failed to get overlay tile"
|
||||
);
|
||||
StatusCode::INTERNAL_SERVER_ERROR.into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@ use tracing::info;
|
|||
|
||||
use crate::api_error::ApiError;
|
||||
use crate::consts::MAX_POIS_PER_REQUEST;
|
||||
use crate::data::{resolve_poi_category_filter, POICategoryGroup};
|
||||
use crate::data::{resolve_poi_category_filter, POICategoryGroup, SchoolMetadata};
|
||||
use crate::parsing::require_bounds;
|
||||
use crate::state::SharedState;
|
||||
|
||||
|
|
@ -22,6 +22,8 @@ pub struct POI {
|
|||
lat: f32,
|
||||
lng: f32,
|
||||
emoji: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
school: Option<SchoolMetadata>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -93,6 +95,7 @@ pub async fn get_pois(
|
|||
lat: state.poi_data.lat[row],
|
||||
lng: state.poi_data.lng[row],
|
||||
emoji: state.poi_data.emoji.get(row).to_string(),
|
||||
school: state.poi_data.school(row).cloned(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
|
|
@ -149,6 +149,14 @@ pub async fn get_postcode_stats(
|
|||
let price_history =
|
||||
stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index);
|
||||
|
||||
let crime_by_year = stats::compute_crime_by_year(
|
||||
&matching_rows,
|
||||
&state.data,
|
||||
&state.crime_by_year,
|
||||
fields_specified,
|
||||
&field_set,
|
||||
);
|
||||
|
||||
let (mut numeric_features, enum_features_out) = stats::compute_feature_stats(
|
||||
&matching_rows,
|
||||
&state.data,
|
||||
|
|
@ -181,6 +189,7 @@ pub async fn get_postcode_stats(
|
|||
numeric_features,
|
||||
enum_features: enum_features_out,
|
||||
price_history,
|
||||
crime_by_year,
|
||||
central_postcode: None,
|
||||
filter_exclusions,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use tracing::{info, warn};
|
|||
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::consts::PROPERTIES_LIMIT;
|
||||
use crate::data::RenovationEvent;
|
||||
use crate::data::{HistoricalPrice, RenovationEvent};
|
||||
use crate::licensing::{check_license_bounds, resolve_share_code};
|
||||
use crate::parsing::{
|
||||
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_filters_with_poi, row_passes_filters,
|
||||
|
|
@ -47,6 +47,8 @@ pub struct Property {
|
|||
pub property_sub_type: Option<String>,
|
||||
pub price_qualifier: Option<String>,
|
||||
pub former_council_house: Option<String>,
|
||||
pub within_conservation_area: Option<String>,
|
||||
pub listed_building: Option<String>,
|
||||
|
||||
// Numeric fields
|
||||
pub lat: f32,
|
||||
|
|
@ -57,6 +59,9 @@ pub struct Property {
|
|||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub renovation_history: Vec<RenovationEvent>,
|
||||
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub historical_prices: Vec<HistoricalPrice>,
|
||||
|
||||
#[serde(flatten)]
|
||||
pub features: FxHashMap<String, f32>,
|
||||
}
|
||||
|
|
@ -167,6 +172,7 @@ pub fn build_property(
|
|||
lat: state.data.lat[row],
|
||||
lon: state.data.lon[row],
|
||||
renovation_history: state.data.renovation_history(row).to_vec(),
|
||||
historical_prices: state.data.historical_prices(row).to_vec(),
|
||||
property_sub_type: state.data.property_sub_type(row).map(String::from),
|
||||
price_qualifier: state.data.price_qualifier(row).map(String::from),
|
||||
former_council_house: lookup_enum_value(
|
||||
|
|
@ -176,6 +182,20 @@ pub fn build_property(
|
|||
row,
|
||||
"Former council house",
|
||||
),
|
||||
within_conservation_area: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
&state.data,
|
||||
enum_values,
|
||||
row,
|
||||
"Within conservation area",
|
||||
),
|
||||
listed_building: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
&state.data,
|
||||
enum_values,
|
||||
row,
|
||||
"Listed building",
|
||||
),
|
||||
features,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,6 +134,7 @@ fn is_allowed_param_key(key: &str) -> bool {
|
|||
| "amenityCount2km"
|
||||
| "amenityCount5km"
|
||||
| "poi"
|
||||
| "overlay"
|
||||
| "tab"
|
||||
| "pc"
|
||||
| "tt"
|
||||
|
|
@ -570,6 +571,20 @@ mod tests {
|
|||
assert_eq!(params, "lat=51.5&lon=-0.1&zoom=12&share=oldcode");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_overlay_params_for_share_links() {
|
||||
let params = sanitized_query_params(
|
||||
"lat=51.5&lon=-0.1&zoom=12&overlay=noise&overlay=crime-hotspots",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
params,
|
||||
"lat=51.5&lon=-0.1&zoom=12&overlay=noise&overlay=crime-hotspots"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escapes_html_attributes() {
|
||||
assert_eq!(escape_attr(r#""'><&"#), ""'><&");
|
||||
|
|
|
|||
|
|
@ -5,9 +5,13 @@ use rustc_hash::FxHashMap;
|
|||
use tracing::error;
|
||||
|
||||
use crate::consts::PRICE_HISTORY_POINTS_LIMIT;
|
||||
use crate::data::crime_by_year::CrimeByYearData;
|
||||
use crate::data::{FeatureStats, PostcodePoiMetrics, PropertyData};
|
||||
|
||||
use super::hexagon_stats::{EnumFeatureStats, HistogramStats, NumericFeatureStats, PricePoint};
|
||||
use super::hexagon_stats::{
|
||||
CrimeYearPoint, CrimeYearStats, EnumFeatureStats, HistogramStats, NumericFeatureStats,
|
||||
PricePoint,
|
||||
};
|
||||
|
||||
/// Extract price history (year, price) pairs from matching rows, downsampled if needed.
|
||||
pub fn extract_price_history(
|
||||
|
|
@ -251,6 +255,91 @@ pub fn compute_feature_stats(
|
|||
(numeric_features, enum_features_out)
|
||||
}
|
||||
|
||||
/// Compute property-weighted per-year crime means across the selection.
|
||||
///
|
||||
/// Each matching property contributes its LSOA's per-year counts; this is the
|
||||
/// same property-weighted-LSOA-average shape used elsewhere in the right pane.
|
||||
/// LSOAs with no series for a given crime type contribute 0 for that type
|
||||
/// (matching how the existing `(avg/yr)` columns treat missing crime types).
|
||||
pub fn compute_crime_by_year(
|
||||
matching_rows: &[usize],
|
||||
data: &PropertyData,
|
||||
crime_by_year: &CrimeByYearData,
|
||||
fields_specified: bool,
|
||||
field_set: &HashSet<String>,
|
||||
) -> Vec<CrimeYearStats> {
|
||||
if crime_by_year.crime_types.is_empty() || matching_rows.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// For each crime type, accumulate per-year sums and the count of rows whose
|
||||
// LSOA exists in the crime side table.
|
||||
let num_types = crime_by_year.crime_types.len();
|
||||
let mut per_type_year_sums: Vec<FxHashMap<i32, f64>> =
|
||||
(0..num_types).map(|_| FxHashMap::default()).collect();
|
||||
let mut per_type_row_counts: Vec<u32> = vec![0; num_types];
|
||||
|
||||
for &row in matching_rows {
|
||||
let lsoa = data.lsoa(row);
|
||||
let Some(series_list) = crime_by_year.series_by_lsoa.get(lsoa) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// For every type the LSOA reports, add its per-year counts.
|
||||
// For types it doesn't report, treat the row as contributing 0 — so we
|
||||
// bump the row count for *every* known type below.
|
||||
for series in series_list {
|
||||
let acc = &mut per_type_year_sums[series.type_idx as usize];
|
||||
for point in &series.points {
|
||||
*acc.entry(point.year).or_insert(0.0) += point.count as f64;
|
||||
}
|
||||
}
|
||||
for c in per_type_row_counts.iter_mut() {
|
||||
*c += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
for (type_idx, name) in crime_by_year.crime_types.iter().enumerate() {
|
||||
// Crime types in the by-year side table are bare (e.g. "Burglary"), while
|
||||
// the configured feature names carry an " (avg/yr)" suffix. Match either
|
||||
// form so callers can pass the feature names they already know.
|
||||
if fields_specified {
|
||||
let with_suffix = format!("{name} (avg/yr)");
|
||||
if !field_set.contains(name.as_str()) && !field_set.contains(with_suffix.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let row_count = per_type_row_counts[type_idx];
|
||||
if row_count == 0 {
|
||||
continue;
|
||||
}
|
||||
let years = crime_by_year
|
||||
.years_by_type
|
||||
.get(type_idx)
|
||||
.map(Vec::as_slice)
|
||||
.unwrap_or(&[]);
|
||||
if years.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let denom = row_count as f64;
|
||||
let sums = &per_type_year_sums[type_idx];
|
||||
let points: Vec<CrimeYearPoint> = years
|
||||
.iter()
|
||||
.map(|&year| CrimeYearPoint {
|
||||
year,
|
||||
count: (sums.get(&year).copied().unwrap_or(0.0) / denom) as f32,
|
||||
})
|
||||
.collect();
|
||||
out.push(CrimeYearStats {
|
||||
name: name.clone(),
|
||||
points,
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub fn compute_poi_feature_stats(
|
||||
matching_rows: &[usize],
|
||||
poi_metrics: &PostcodePoiMetrics,
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ use rustc_hash::FxHashMap;
|
|||
use crate::auth::TokenCache;
|
||||
use crate::bugsink::FrontendConfig as BugsinkFrontendConfig;
|
||||
use crate::data::{
|
||||
ActualListingData, OutcodeData, POICategoryGroup, POIData, PlaceData, PostcodeData,
|
||||
PropertyData, TravelTimeStore,
|
||||
ActualListingData, CrimeByYearData, OutcodeData, POICategoryGroup, POIData, PlaceData,
|
||||
PostcodeData, PropertyData, TravelTimeStore,
|
||||
};
|
||||
use crate::licensing::ShareBoundsCache;
|
||||
use crate::pocketbase::SuperuserTokenCache;
|
||||
|
|
@ -46,6 +46,9 @@ pub struct AppState {
|
|||
pub travel_time_store: Arc<TravelTimeStore>,
|
||||
/// Optional real-world listings (e.g. Rightmove / Zoopla data) loaded from ACTUAL_LISTINGS_PATH.
|
||||
pub actual_listings: Option<Arc<ActualListingData>>,
|
||||
/// Per-LSOA per-year crime counts used by the right pane to plot trends.
|
||||
/// Empty when the side parquet was not supplied.
|
||||
pub crime_by_year: Arc<CrimeByYearData>,
|
||||
/// Token validation cache (60s TTL)
|
||||
pub token_cache: Arc<TokenCache>,
|
||||
/// Cached PocketBase superuser token (10min TTL) to avoid rate-limiting
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue