Fun changes
Some checks failed
CI / Python (lint + test) (push) Failing after 3m38s
CI / Rust (lint + test) (push) Failing after 3m32s
CI / Frontend (lint + typecheck) (push) Failing after 4m12s
Build and publish Docker image / build-and-push (push) Failing after 4m48s

This commit is contained in:
Andras Schmelczer 2026-04-04 22:59:44 +01:00
parent cd778dd088
commit 349a6c1d53
60 changed files with 1260 additions and 2600 deletions

View file

@ -166,6 +166,8 @@ pub struct PropertyData {
/// For enum features: maps feature index to list of possible string values.
/// Index in values list corresponds to the u16 value stored in feature_data.
pub enum_values: rustc_hash::FxHashMap<usize, Vec<String>>,
/// For enum features: maps feature index to per-value global counts (same order as enum_values).
pub enum_counts: rustc_hash::FxHashMap<usize, Vec<u64>>,
/// Per-row flag: true = construction date is approximate (from EPC band),
/// false = exact (from new-build transaction date).
/// Bit-packed: byte `row / 8`, bit `row % 8`. 8x smaller than Vec<bool>.
@ -173,12 +175,6 @@ pub struct PropertyData {
/// Per-row renovation events. Keyed by (permuted) row index.
/// Only rows with events are present in the map.
renovation_history: FxHashMap<u32, Vec<RenovationEvent>>,
/// Per-row listing features (key feature bullet points from online listings).
/// Only rows with features are present in the map.
listing_features: FxHashMap<u32, Vec<String>>,
/// Sparse per-row optional string columns from online listings.
/// Only rows with non-empty values are stored (saves ~1 GB vs Vec<Option<String>>).
listing_url: FxHashMap<u32, String>,
property_sub_type: FxHashMap<u32, String>,
price_qualifier: FxHashMap<u32, String>,
}
@ -215,19 +211,6 @@ impl PropertyData {
.unwrap_or(&[])
}
/// Get listing features for a given row (empty slice if none).
pub fn listing_features(&self, row: usize) -> &[String] {
self.listing_features
.get(&(row as u32))
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// Get listing URL for a given row.
pub fn listing_url(&self, row: usize) -> Option<&str> {
self.listing_url.get(&(row as u32)).map(String::as_str)
}
/// Get property sub-type for a given row.
pub fn property_sub_type(&self, row: usize) -> Option<&str> {
self.property_sub_type
@ -534,8 +517,6 @@ impl PropertyData {
pub fn load(
properties_path: &Path,
postcode_features_path: &Path,
listings_buy_path: &Path,
listings_rent_path: &Path,
) -> anyhow::Result<Self> {
// Load postcode.parquet
tracing::info!(
@ -551,9 +532,8 @@ impl PropertyData {
// Load properties.parquet and join with postcode data for lat/lon + area features
tracing::info!("Loading properties from {:?}", properties_path);
let properties_lf = LazyFrame::scan_parquet(properties_path, Default::default())
.context("Failed to scan properties parquet")?
.with_columns([lit("Historical sale").alias("Listing status")]);
let properties_joined = properties_lf
.context("Failed to scan properties parquet")?;
let combined = properties_lf
.join(
postcode_df.clone().lazy(),
[col("Postcode")],
@ -562,77 +542,8 @@ impl PropertyData {
)
.collect()
.context("Failed to join properties with postcodes")?;
let prop_count = properties_joined.height();
tracing::info!(rows = prop_count, "Properties joined with postcodes");
// Load online listings (buy + rent) — these have their own lat/lon.
// Expects the new finder parquet format with human-readable column names.
let load_listings = |path: &Path, label: &str| -> anyhow::Result<DataFrame> {
tracing::info!("Loading {} listings from {:?}", label, path);
let lf = LazyFrame::scan_parquet(path, Default::default())
.with_context(|| format!("Failed to scan {label} listings parquet"))?;
// Join with postcodes for area features (listings have their own lat/lon)
let pc_no_coords = postcode_df.clone().lazy().drop(["lat", "lon"]);
let joined = lf
.join(
pc_no_coords,
[col("Postcode")],
[col("Postcode")],
JoinArgs::new(JoinType::Left),
)
.collect()
.with_context(|| format!("Failed to join {label} listings with postcodes"))?;
tracing::info!(rows = joined.height(), "{} listings joined", label);
Ok(joined)
};
let listings_buy = load_listings(listings_buy_path, "buy")?;
// Derive "Asking price per sqm" if not already present
let listings_buy = if listings_buy.schema().get("Asking price per sqm").is_none() {
listings_buy
.lazy()
.with_column(
(col("Asking price").cast(DataType::Float64) / col("Total floor area (sqm)"))
.round(0)
.alias("Asking price per sqm"),
)
.collect()
.context("Failed to derive Asking price per sqm")?
} else {
listings_buy
};
let listings_rent = load_listings(listings_rent_path, "rent")?;
// Concatenate all rows into a single DataFrame
tracing::info!("Concatenating all data sources");
let buy_count = listings_buy.height();
let rent_count = listings_rent.height();
let combined = concat(
[
properties_joined.lazy(),
listings_buy.lazy(),
listings_rent.lazy(),
],
UnionArgs {
parallel: false,
rechunk: true,
to_supertypes: true,
diagonal: true,
..Default::default()
},
)
.context("Failed to concat data sources")?
.collect()
.context("Failed to collect combined data")?;
let total_rows = combined.height();
tracing::info!(
properties = prop_count,
buy_listings = buy_count,
rent_listings = rent_count,
total = total_rows,
"All data sources combined"
);
tracing::info!(rows = total_rows, "Properties joined with postcodes");
// Get configured feature/enum names in config order
let numeric_names = features::all_numeric_feature_names();
@ -703,12 +614,11 @@ impl PropertyData {
}
}
// String columns for address/postcode and online listing metadata
// String columns for address/postcode and property metadata
for &string_col_name in &[
"Address per Property Register",
"Address per EPC",
"Postcode",
"Listing URL",
"Property sub-type",
"Price qualifier",
] {
@ -731,11 +641,6 @@ impl PropertyData {
if has_renovation_history {
select_exprs.push(col("renovation_history"));
}
let has_listing_features = schema.get("Listing features").is_some();
if has_listing_features {
select_exprs.push(col("Listing features"));
}
let df = combined
.lazy()
.select(select_exprs)
@ -827,7 +732,7 @@ impl PropertyData {
let address_raw = extract_string_col(&df, "Address per Property Register")?;
let postcode_raw = extract_string_col(&df, "Postcode")?;
// Extract optional string columns for online listing metadata
// Extract optional string columns
let extract_optional_string_col =
|df: &DataFrame, name: &str| -> anyhow::Result<Vec<Option<String>>> {
if let Ok(column) = df.column(name) {
@ -852,7 +757,6 @@ impl PropertyData {
}
};
let listing_url_raw = extract_optional_string_col(&df, "Listing URL")?;
let property_sub_type_raw = extract_optional_string_col(&df, "Property sub-type")?;
let price_qualifier_raw = extract_optional_string_col(&df, "Price qualifier")?;
@ -996,44 +900,6 @@ impl PropertyData {
FxHashMap::default()
};
// Extract listing features: List<String>
let mut listing_features_raw: FxHashMap<u32, Vec<String>> = if has_listing_features {
tracing::info!("Extracting listing features");
let feat_col = df
.column("Listing features")
.context("Missing Listing features column")?;
let list_ca = feat_col
.list()
.context("Listing features is not a list column")?;
let mut features_map: FxHashMap<u32, Vec<String>> = FxHashMap::default();
for old_row in 0..row_count {
if let Some(inner) = list_ca.get_as_series(old_row) {
if inner.is_empty() {
continue;
}
let str_ca = inner
.str()
.context("Listing features inner is not a string series")?;
let items: Vec<String> = str_ca
.into_iter()
.filter_map(|v| v.map(|s| s.to_string()))
.filter(|s| !s.is_empty())
.collect();
if !items.is_empty() {
features_map.insert(old_row as u32, items);
}
}
}
tracing::info!(
properties_with_features = features_map.len(),
"Listing features extracted"
);
features_map
} else {
FxHashMap::default()
};
// Sort all rows by spatial locality so that grid queries access
// contiguous memory (sequential reads instead of random DRAM accesses).
tracing::info!("Sorting rows by spatial locality");
@ -1103,28 +969,7 @@ impl PropertyData {
map
};
// Re-key listing_features by permuted row index
let listing_features: FxHashMap<u32, Vec<String>> = {
let mut map =
FxHashMap::with_capacity_and_hasher(listing_features_raw.len(), Default::default());
for (new_row, &old_row) in perm.iter().enumerate() {
if let Some(items) = listing_features_raw.remove(&old_row) {
map.insert(new_row as u32, items);
}
}
map
};
// Permute optional string columns into sparse HashMaps
let listing_url: FxHashMap<u32, String> = {
let mut map = FxHashMap::default();
for (new_row, &old_row) in perm.iter().enumerate() {
if let Some(ref s) = listing_url_raw[old_row as usize] {
map.insert(new_row as u32, s.clone());
}
}
map
};
let property_sub_type: FxHashMap<u32, String> = {
let mut map = FxHashMap::default();
for (new_row, &old_row) in perm.iter().enumerate() {
@ -1145,11 +990,24 @@ impl PropertyData {
};
// Build enum_values map: feature_index -> list of string values
// and enum_counts map: feature_index -> per-value global counts
let mut enum_values: rustc_hash::FxHashMap<usize, Vec<String>> =
rustc_hash::FxHashMap::default();
for (enum_idx, (values, _)) in enum_col_major.iter().enumerate() {
let mut enum_counts: rustc_hash::FxHashMap<usize, Vec<u64>> =
rustc_hash::FxHashMap::default();
for (enum_idx, (values, encoded)) in enum_col_major.iter().enumerate() {
let feature_idx = num_numeric + enum_idx;
enum_values.insert(feature_idx, values.clone());
let mut counts = vec![0u64; values.len()];
for &val in encoded {
if val.is_finite() {
let idx = val as usize;
if idx < counts.len() {
counts[idx] += 1;
}
}
}
enum_counts.insert(feature_idx, counts);
}
// Build feature_stats: numeric stats + placeholder stats for enums
@ -1232,10 +1090,9 @@ impl PropertyData {
postcode_interner,
postcode_keys,
enum_values,
enum_counts,
approx_build_date_bits,
renovation_history,
listing_features,
listing_url,
property_sub_type,
price_qualifier,
})

View file

@ -28,16 +28,12 @@ pub struct FeatureConfig {
pub raw: bool,
/// If true, the slider uses absolute min/max/step instead of percentile scaling
pub absolute: bool,
/// Listing modes this feature is available in (empty = all modes)
pub modes: &'static [&'static str],
/// Name of the linked feature that swaps when switching modes (empty = no link)
pub linked: &'static str,
}
/// Features whose histogram bins should be exactly 1 unit wide (one per integer).
/// p1/p99 are snapped to integer boundaries before binning.
pub const INTEGER_BIN_FEATURES: &[&str] =
&["Number of bedrooms & living rooms", "Bedrooms", "Bathrooms"];
&["Number of bedrooms & living rooms"];
pub struct EnumFeatureConfig {
pub name: &'static str,
@ -69,13 +65,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
FeatureGroup {
name: "Properties",
features: &[
Feature::Enum(EnumFeatureConfig {
name: "Listing status",
order: Some(&["Historical sale", "For sale", "For rent"]),
description: "Whether the property is from historical sales, currently for sale, or for rent",
detail: "Indicates the source of the property record: 'Historical sale' from HM Land Registry Price Paid data, 'For sale' from current online buy listings, or 'For rent' from current online rental listings.",
source: "online-listings",
}),
Feature::Enum(EnumFeatureConfig {
name: "Property type",
order: Some(&["Detached", "Semi-Detached", "Terraced", "Flats/Maisonettes", "Other"]),
@ -104,8 +93,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: true,
modes: &["historical"],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Estimated current price",
@ -121,25 +108,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: true,
modes: &["historical"],
linked: "Asking price",
}),
Feature::Numeric(FeatureConfig {
name: "Asking price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_500_000.0,
},
step: 10000.0,
description: "Asking price for properties currently listed for sale",
detail: "The advertised asking price from online property portals. Only available for 'For sale' listings.",
source: "online-listings",
prefix: "£",
suffix: "",
raw: false,
absolute: true,
modes: &["buy"],
linked: "Estimated current price",
}),
Feature::Numeric(FeatureConfig {
name: "Price per sqm",
@ -155,8 +123,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &["historical"],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Est. price per sqm",
@ -172,25 +138,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &["historical"],
linked: "Asking price per sqm",
}),
Feature::Numeric(FeatureConfig {
name: "Asking price per sqm",
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 100.0,
description: "Asking price divided by total floor area",
detail: "Calculated by dividing the listed asking price by the total floor area. Only available for properties currently listed for sale where floor area data exists.",
source: "online-listings",
prefix: "£",
suffix: "",
raw: false,
absolute: false,
modes: &["buy"],
linked: "Est. price per sqm",
}),
Feature::Numeric(FeatureConfig {
name: "Estimated monthly rent",
@ -203,25 +150,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/mo",
raw: false,
absolute: false,
modes: &["historical"],
linked: "Asking rent (monthly)",
}),
Feature::Numeric(FeatureConfig {
name: "Asking rent (monthly)",
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 50.0,
description: "Listed monthly rent for properties currently for rent",
detail: "The advertised rental price from online property portals, converted to a monthly figure where needed (e.g. weekly or yearly listings). Only available for 'For rent' listings.",
source: "online-listings",
prefix: "£",
suffix: "/mo",
raw: false,
absolute: false,
modes: &["rent"],
linked: "Estimated monthly rent",
}),
Feature::Numeric(FeatureConfig {
name: "Total floor area (sqm)",
@ -237,8 +165,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " sqm",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Number of bedrooms & living rooms",
@ -254,42 +180,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " rooms",
raw: false,
absolute: true,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Bedrooms",
bounds: Bounds::Fixed {
min: 0.0,
max: 10.0,
},
step: 1.0,
description: "Number of bedrooms from online listing",
detail: "Number of bedrooms as advertised in the online property listing. Only populated for online listings (for sale and for rent); null for historical sales.",
source: "online-listings",
prefix: "",
suffix: "",
raw: false,
absolute: true,
modes: &["buy", "rent"],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Bathrooms",
bounds: Bounds::Fixed {
min: 0.0,
max: 10.0,
},
step: 1.0,
description: "Number of bathrooms from online listing",
detail: "Number of bathrooms as advertised in the online property listing. Only populated for online listings (for sale and for rent); null for historical sales.",
source: "online-listings",
prefix: "",
suffix: "",
raw: false,
absolute: true,
modes: &["buy", "rent"],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Construction year",
@ -305,8 +195,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: true,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Date of last transaction",
@ -322,25 +210,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: true,
absolute: false,
modes: &["historical"],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Listing date",
bounds: Bounds::Fixed {
min: 2006.0,
max: 2026.0,
},
step: 1.0,
description: "Date the property was first listed online",
detail: "The date when the property listing first appeared on the online property portal. Stored as a datetime; converted to fractional year for filtering. Only populated for online listings.",
source: "online-listings",
prefix: "",
suffix: "",
raw: true,
absolute: false,
modes: &["buy", "rent"],
linked: "",
}),
Feature::Enum(EnumFeatureConfig {
name: "Former council house",
@ -377,8 +246,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " m",
raw: false,
absolute: false,
modes: &["historical"],
linked: "",
}),
],
},
@ -399,8 +266,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " km",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
@ -421,8 +286,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Good+ secondary schools within 2km",
@ -438,8 +301,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Good+ primary schools within 5km",
@ -455,8 +316,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Good+ secondary schools within 5km",
@ -472,8 +331,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Education, Skills and Training Score",
@ -489,8 +346,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
@ -508,8 +363,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Employment Score (rate)",
@ -522,8 +375,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Health Deprivation and Disability Score",
@ -539,8 +390,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Living Environment Score",
@ -556,8 +405,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Indoors Sub-domain Score",
@ -573,8 +420,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Outdoors Sub-domain Score",
@ -590,8 +435,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
@ -612,8 +455,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Minor crime per 1k residents (avg/yr)",
@ -629,8 +470,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Serious crime (avg/yr)",
@ -646,8 +485,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Minor crime (avg/yr)",
@ -663,8 +500,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Violence and sexual offences (avg/yr)",
@ -680,8 +515,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Burglary (avg/yr)",
@ -697,8 +530,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Robbery (avg/yr)",
@ -714,8 +545,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Vehicle crime (avg/yr)",
@ -731,8 +560,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Anti-social behaviour (avg/yr)",
@ -748,8 +575,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Criminal damage and arson (avg/yr)",
@ -765,8 +590,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Other theft (avg/yr)",
@ -782,8 +605,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Theft from the person (avg/yr)",
@ -799,8 +620,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Shoplifting (avg/yr)",
@ -816,8 +635,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Bicycle theft (avg/yr)",
@ -833,8 +650,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Drugs (avg/yr)",
@ -850,8 +665,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Possession of weapons (avg/yr)",
@ -867,8 +680,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Public order (avg/yr)",
@ -884,8 +695,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Other crime (avg/yr)",
@ -901,8 +710,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "/yr",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
@ -923,8 +730,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " years",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% White",
@ -940,8 +745,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% South Asian",
@ -957,8 +760,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% Black",
@ -974,8 +775,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% East Asian",
@ -991,8 +790,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% Mixed",
@ -1008,8 +805,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "% Other",
@ -1025,8 +820,148 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "%",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
FeatureGroup {
name: "Politics",
features: &[
Feature::Enum(EnumFeatureConfig {
name: "Winning party",
order: Some(&[
"Labour",
"Conservative",
"Liberal Democrat",
"Reform UK",
"Green",
"Other parties",
]),
description:
"Party that won the parliamentary constituency in the 2024 General Election",
detail: "The political party that won the most votes in the constituency covering this postcode, from the July 2024 UK General Election. Based on first-past-the-post results published by the UK Parliament. Constituencies were redrawn for 2024 using the Boundary Commission's 2023 review.",
source: "election-results",
}),
Feature::Numeric(FeatureConfig {
name: "% Labour",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Labour vote share in the 2024 General Election",
detail: "Percentage of valid votes cast for the Labour Party in the constituency covering this postcode, from the July 2024 UK General Election. Includes votes for all Labour candidates where multiple stood.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "% Conservative",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Conservative vote share in the 2024 General Election",
detail: "Percentage of valid votes cast for the Conservative Party in the constituency covering this postcode, from the July 2024 UK General Election.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "% Liberal Democrat",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Liberal Democrat vote share in the 2024 General Election",
detail: "Percentage of valid votes cast for the Liberal Democrats in the constituency covering this postcode, from the July 2024 UK General Election.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "% Reform UK",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Reform UK vote share in the 2024 General Election",
detail: "Percentage of valid votes cast for Reform UK in the constituency covering this postcode, from the July 2024 UK General Election.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "% Green",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Green Party vote share in the 2024 General Election",
detail: "Percentage of valid votes cast for the Green Party in the constituency covering this postcode, from the July 2024 UK General Election.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "% Other parties",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Combined vote share of all other parties and independents",
detail: "Percentage of valid votes cast for parties other than Labour, Conservative, Liberal Democrat, Reform UK, and Green in the constituency covering this postcode. Includes independents, the Speaker, and smaller parties.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Voter turnout (%)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.5,
description:
"Percentage of registered voters who voted in the 2024 General Election",
detail: "The proportion of the registered electorate who cast a valid vote in the July 2024 UK General Election. Calculated as valid votes divided by electorate size. Higher turnout generally correlates with more affluent areas and closer contests.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Majority (%)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.5,
description:
"Winning margin as a percentage of valid votes in the 2024 General Election",
detail: "The difference in votes between the winning candidate and the runner-up, expressed as a percentage of total valid votes cast. A small majority indicates a marginal seat (competitive); a large majority indicates a safe seat. From the July 2024 UK General Election results published by the UK Parliament.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
],
},
@ -1047,25 +982,21 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " km",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Number of parks within 2km",
name: "Number of parks within 1km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of parks and green spaces within 2km",
detail: "Count of public parks, gardens, playing fields, and play spaces with at least one entrance within a 2km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset (Ordnance Survey), using park entrance locations for accurate proximity matching.",
description: "Number of parks and green spaces within 1km",
detail: "Count of public parks, gardens, playing fields, and play spaces with at least one entrance within a 1km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset (Ordnance Survey), using park entrance locations for accurate proximity matching.",
source: "os-open-greenspace",
prefix: "",
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Number of restaurants within 2km",
@ -1081,8 +1012,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Number of grocery shops and supermarkets within 2km",
@ -1098,8 +1027,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Noise (dB)",
@ -1115,8 +1042,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " dB",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
Feature::Enum(EnumFeatureConfig {
name: "Max available download speed (Mbps)",

View file

@ -51,14 +51,6 @@ struct Cli {
#[arg(long)]
postcode_features: PathBuf,
/// Path to online_listings_buy.parquet
#[arg(long)]
listings_buy: PathBuf,
/// Path to online_listings_rent.parquet
#[arg(long)]
listings_rent: PathBuf,
/// Path to the POI parquet file
#[arg(long)]
pois: PathBuf,
@ -162,8 +154,6 @@ async fn main() -> anyhow::Result<()> {
for (label, path) in [
("Properties", &cli.properties),
("Postcode features", &cli.postcode_features),
("Listings buy", &cli.listings_buy),
("Listings rent", &cli.listings_rent),
] {
if !path.exists() {
bail!("{} parquet file not found: {}", label, path.display());
@ -171,17 +161,13 @@ async fn main() -> anyhow::Result<()> {
}
info!(
"Loading property data from {}, {}, {}, {}",
"Loading property data from {}, {}",
cli.properties.display(),
cli.postcode_features.display(),
cli.listings_buy.display(),
cli.listings_rent.display(),
);
let property_data = data::PropertyData::load(
&cli.properties,
&cli.postcode_features,
&cli.listings_buy,
&cli.listings_rent,
)?;
info!(
rows = property_data.lat.len(),
@ -404,13 +390,7 @@ async fn main() -> anyhow::Result<()> {
stripe_referral_coupon_id: cli.stripe_referral_coupon_id,
};
let shared = Arc::new(SharedState::new(
app_state,
cli.properties,
cli.postcode_features,
cli.listings_buy,
cli.listings_rent,
));
let shared = Arc::new(SharedState::new(app_state));
// Start background PocketBase metrics poller (users, saved searches/properties counts)
pocketbase::start_metrics_poller(shared.clone());
@ -428,8 +408,7 @@ async fn main() -> anyhow::Result<()> {
.allow_credentials(true);
// Handlers use Axum's State extractor to get Arc<SharedState>, then call
// load_state() to get the current Arc<AppState>. This enables hot-reload:
// the reload endpoint swaps in a new AppState, and subsequent requests pick it up.
// load_state() to get the current Arc<AppState>.
let s_crawler = shared.clone();
let reader_tile = tile_reader.clone();
@ -498,7 +477,6 @@ async fn main() -> anyhow::Result<()> {
.route("/api/redeem-invite", post(routes::post_redeem_invite))
.route("/s/{code}", get(routes::get_short_url))
.route("/api/telemetry", post(routes::post_telemetry))
.route("/api/reload", post(routes::post_reload))
.route(
"/pb/{*rest}",
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(10)),

View file

@ -731,6 +731,35 @@ pub async fn ensure_collections(
ensure_autodate_fields(client, base_url, &token, "short_urls").await?;
}
if !existing.iter().any(|n| n == "location_logs") {
let users_id = find_users_collection_id(client, base_url, &token).await?;
create_collection(
client,
base_url,
&token,
CreateCollection {
name: "location_logs".to_string(),
r#type: "base".to_string(),
fields: vec![
Field::relation("user", &users_id),
Field::number("latitude"),
Field::number("longitude"),
Field::text("postcode", true),
Field::autodate("created", true, false),
Field::autodate("updated", true, true),
],
list_rule: None,
view_rule: None,
create_rule: None,
update_rule: None,
delete_rule: None,
},
)
.await?;
} else {
ensure_autodate_fields(client, base_url, &token, "location_logs").await?;
}
if !existing.iter().any(|n| n == "ai_query_logs") {
let users_id = find_users_collection_id(client, base_url, &token).await?;
create_collection(
@ -743,7 +772,6 @@ pub async fn ensure_collections(
fields: vec![
Field::relation("user", &users_id),
Field::text("query", true),
Field::text("listing_type", false),
Field::text("response_filters", false),
Field::text("response_notes", false),
Field::number("tokens_used"),
@ -916,6 +944,48 @@ async fn poll_pocketbase_counts(state: &AppState) {
}
}
/// Insert a record into the `location_logs` collection.
/// Best-effort — logs warnings on failure but does not propagate errors.
pub async fn log_user_location(
state: &AppState,
user_id: &str,
latitude: f64,
longitude: f64,
postcode: &str,
) {
let token = match get_superuser_token(state).await {
Ok(tk) => tk,
Err(err) => {
warn!("Failed to auth superuser for location log: {err}");
return;
}
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/location_logs/records");
let res = state
.http_client
.post(&url)
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({
"user": user_id,
"latitude": latitude,
"longitude": longitude,
"postcode": postcode,
}))
.send()
.await;
match res {
Ok(resp) if resp.status().is_success() => {}
Ok(resp) => {
let status = resp.status();
warn!("Failed to log user location ({status})");
}
Err(err) => warn!("Failed to log user location: {err}"),
}
}
/// Insert a record into the `ai_query_logs` collection.
/// Best-effort — logs warnings on failure but does not propagate errors.
#[allow(clippy::too_many_arguments)]
@ -923,7 +993,6 @@ pub async fn log_ai_query(
state: &AppState,
user_id: &str,
query: &str,
listing_type: &str,
response_filters: &str,
response_notes: &str,
tokens_used: u64,
@ -946,7 +1015,6 @@ pub async fn log_ai_query(
.json(&serde_json::json!({
"user": user_id,
"query": query,
"listing_type": listing_type,
"response_filters": response_filters,
"response_notes": response_notes,
"tokens_used": tokens_used,

View file

@ -17,7 +17,6 @@ mod postcode_stats;
mod postcodes;
pub(crate) mod pricing;
pub(crate) mod properties;
mod reload;
mod screenshot;
mod shorten;
mod stats;
@ -48,7 +47,6 @@ pub use postcode_stats::get_postcode_stats;
pub use postcodes::{get_nearest_postcode, get_postcode_lookup, get_postcodes};
pub use pricing::get_pricing;
pub use properties::get_hexagon_properties;
pub use reload::post_reload;
pub use screenshot::{fetch_screenshot_bytes, get_screenshot};
pub use shorten::{get_short_url, post_shorten};
pub use streetview::get_streetview;

View file

@ -280,7 +280,7 @@ pub fn build_system_prompt(
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 2km.\n\
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 1km.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
(note: this counts bedrooms + living rooms combined, so 3 bed ~ min 4).\n\
@ -341,7 +341,7 @@ pub fn build_system_prompt(
modes_list,
));
// Feature guidance — only historical features are available
// Feature guidance
parts.push(
"\n--- DATA SOURCE ---\n\
The data is historical property sales from the Land Registry.\n\
@ -349,11 +349,7 @@ pub fn build_system_prompt(
Use these features for price queries:\n\
- For purchase price: use \"Estimated current price\" or \"Last known price\"\n\
- For price per sqm: use \"Est. price per sqm\"\n\
- For rent: use \"Estimated monthly rent\"\n\
\n\
Features marked with [historical] below are available. \
Features marked with [buy] or [rent] are NOT available do not use them.\n\
ONLY use features marked [historical] or unmarked."
- For rent estimates: use \"Estimated monthly rent\""
.to_string(),
);
@ -374,17 +370,11 @@ pub fn build_system_prompt(
description,
prefix,
suffix,
modes,
..
} => {
let mode_str = if modes.is_empty() {
String::new()
} else {
format!(" [{}]", modes.join("/"))
};
parts.push(format!(
"- \"{}\"{} (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, mode_str, prefix, min, suffix, prefix, max, suffix, description
"- \"{}\" (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, prefix, min, suffix, prefix, max, suffix, description
));
}
FeatureInfo::Enum {
@ -393,10 +383,6 @@ pub fn build_system_prompt(
description,
..
} => {
// Skip Listing status — auto-injected as "Historical sale"
if name == "Listing status" {
continue;
}
parts.push(format!(
"- \"{}\" (enum, values: [{}]): {}",
name,
@ -433,7 +419,7 @@ pub fn build_system_prompt(
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
{\"name\": \"Number of parks within 2km\", \"bound\": \"min\", \"value\": 3}], \
{\"name\": \"Number of parks within 1km\", \"bound\": \"min\", \"value\": 3}], \
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
.to_string(),
);
@ -935,8 +921,7 @@ pub async fn post_ai_filters(
}
};
// Only historical mode is supported — validate features accordingly
let mut filters = validate_and_convert(&raw, &state.features_response, "historical");
let filters = validate_and_convert(&raw, &state.features_response);
let travel_time_filters = validate_travel_time_filters(&raw, &state);
let notes = raw
.get("notes")
@ -944,11 +929,6 @@ pub async fn post_ai_filters(
.unwrap_or("")
.to_string();
// Auto-inject Listing status filter for historical mode
if let Value::Object(ref mut map) = filters {
map.insert("Listing status".to_string(), json!(["Historical sale"]));
}
// Count matching properties and refine if too restrictive
let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
info!(
@ -1026,7 +1006,6 @@ pub async fn post_ai_filters(
let log_state = state.clone();
let log_user_id = user.id.clone();
let log_query = req.query.clone();
let log_listing_type = "historical".to_string();
let log_notes = notes.clone();
let log_rounds = (round + 1) as u64;
tokio::spawn(async move {
@ -1034,7 +1013,6 @@ pub async fn post_ai_filters(
&log_state,
&log_user_id,
&log_query,
&log_listing_type,
&filters_json,
&log_notes,
total_tokens_accumulated,
@ -1137,10 +1115,10 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
/// ```json
/// { "Last known price": [0, 300000], "Leasehold/Freehold": ["Freehold"] }
/// ```
fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type: &str) -> Value {
fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
let mut result = serde_json::Map::new();
// Build lookup maps from feature metadata, filtering by listing mode.
// Build lookup maps from feature metadata.
// Store both slider bounds (min/max from percentiles) and true data bounds
// (histogram.min/max) so one-sided AI filters use the full data range.
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32, f32, f32)> =
@ -1156,19 +1134,12 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
min,
max,
histogram,
modes,
..
} => {
// Only include features valid for the chosen listing mode
if modes.is_empty() || modes.contains(&listing_type) {
numeric_features.insert(name, (*min, *max, histogram.min, histogram.max));
}
numeric_features.insert(name, (*min, *max, histogram.min, histogram.max));
}
FeatureInfo::Enum { name, values, .. } => {
// Skip Listing status — handled via auto-injection
if name != "Listing status" {
enum_features.insert(name, values);
}
enum_features.insert(name, values);
}
}
}

View file

@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::sync::Arc;
use axum::extract::State;
@ -17,10 +18,6 @@ fn is_false(val: &bool) -> bool {
!val
}
fn is_empty_slice(val: &&[&str]) -> bool {
val.is_empty()
}
#[derive(Clone, Serialize)]
#[serde(tag = "type")]
pub enum FeatureInfo {
@ -42,15 +39,12 @@ pub enum FeatureInfo {
raw: bool,
#[serde(skip_serializing_if = "is_false")]
absolute: bool,
#[serde(skip_serializing_if = "is_empty_slice")]
modes: &'static [&'static str],
#[serde(skip_serializing_if = "is_empty")]
linked: &'static str,
},
#[serde(rename = "enum")]
Enum {
name: String,
values: Vec<String>,
counts: HashMap<String, u64>,
description: &'static str,
detail: &'static str,
source: &'static str,
@ -98,8 +92,6 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
suffix: config.suffix,
raw: config.raw,
absolute: config.absolute,
modes: config.modes,
linked: config.linked,
});
}
}
@ -110,9 +102,22 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
.position(|name| name == config.name)
{
if let Some(values) = data.enum_values.get(&feat_idx) {
let counts = data
.enum_counts
.get(&feat_idx)
.map(|c| {
values
.iter()
.zip(c.iter())
.filter(|(_, &count)| count > 0)
.map(|(v, &count)| (v.clone(), count))
.collect()
})
.unwrap_or_default();
features.push(FeatureInfo::Enum {
name: config.name.to_string(),
values: values.clone(),
counts,
description: config.description,
detail: config.detail,
source: config.source,

View file

@ -13,6 +13,7 @@ use tracing::info;
use crate::aggregation::{Aggregator, EnumDistConfig};
use crate::auth::OptionalUser;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::pocketbase::log_user_location;
use crate::data::travel_time::TravelData;
use crate::licensing::check_license_bounds;
use crate::parsing::{
@ -339,8 +340,10 @@ pub async fn get_postcodes(
}
/// Find the nearest postcode to a given lat/lng coordinate.
/// If the user is authenticated, logs their location to PocketBase in the background.
pub async fn get_nearest_postcode(
State(shared): State<Arc<SharedState>>,
Extension(user): Extension<OptionalUser>,
Query(params): Query<NearestPostcodeParams>,
) -> Result<Json<Value>, StatusCode> {
let state = shared.load_state();
@ -368,6 +371,18 @@ pub async fn get_nearest_postcode(
let geometry = postcode_data.geometries[idx].clone();
let postcode = &postcode_data.postcodes[idx];
// Log location for authenticated users (best-effort, non-blocking)
if let Some(ref pb_user) = user.0 {
let state = state.clone();
let user_id = pb_user.id.clone();
let lat_f64 = params.lat;
let lng_f64 = params.lng;
let pc = postcode.clone();
tokio::spawn(async move {
log_user_location(&state, &user_id, lat_f64, lng_f64, &pc).await;
});
}
info!(postcode = %postcode, "GET /api/nearest-postcode");
Ok(Json(serde_json::json!({
"postcode": postcode,

View file

@ -38,8 +38,6 @@ pub struct Property {
pub duration: Option<String>,
pub current_energy_rating: Option<String>,
pub potential_energy_rating: Option<String>,
pub listing_status: Option<String>,
pub listing_url: Option<String>,
pub property_sub_type: Option<String>,
pub price_qualifier: Option<String>,
pub former_council_house: Option<String>,
@ -53,9 +51,6 @@ pub struct Property {
#[serde(skip_serializing_if = "Vec::is_empty")]
pub renovation_history: Vec<RenovationEvent>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub listing_features: Vec<String>,
#[serde(flatten)]
pub features: FxHashMap<String, f32>,
}
@ -158,15 +153,6 @@ pub fn build_property(
lat: state.data.lat[row],
lon: state.data.lon[row],
renovation_history: state.data.renovation_history(row).to_vec(),
listing_features: state.data.listing_features(row).to_vec(),
listing_status: lookup_enum_value(
feature_name_to_index,
&state.data,
enum_values,
row,
"Listing status",
),
listing_url: state.data.listing_url(row).map(String::from),
property_sub_type: state.data.property_sub_type(row).map(String::from),
price_qualifier: state.data.price_qualifier(row).map(String::from),
former_council_house: lookup_enum_value(

View file

@ -1,5 +1,3 @@
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use parking_lot::RwLock;
@ -14,7 +12,6 @@ use crate::routes::FeaturesResponse;
use crate::utils::GridIndex;
pub struct AppState {
// --- Rebuilt on reload ---
pub data: PropertyData,
pub grid: GridIndex,
/// h3_cells[row_idx] = precomputed H3 cell ID at max resolution (12).
@ -33,7 +30,6 @@ pub struct AppState {
/// Complete system prompt for AI filters (features + examples + instructions)
pub ai_filters_system_prompt: String,
// --- Shared across reloads (Arc for cheap cloning) ---
pub poi_data: Arc<POIData>,
pub poi_grid: Arc<GridIndex>,
pub place_data: Arc<PlaceData>,
@ -81,34 +77,16 @@ pub struct AppState {
pub stripe_referral_coupon_id: String,
}
/// Wraps AppState with atomic swap capability for hot-reloading.
/// Wraps AppState for shared access across route handlers.
/// Route handlers call `load_state()` to get the current snapshot.
/// The reload endpoint builds a new AppState and swaps it in atomically.
pub struct SharedState {
current: RwLock<Arc<AppState>>,
reloading: AtomicBool,
/// Paths needed for data reload
pub properties_path: PathBuf,
pub postcode_features_path: PathBuf,
pub listings_buy_path: PathBuf,
pub listings_rent_path: PathBuf,
}
impl SharedState {
pub fn new(
state: AppState,
properties_path: PathBuf,
postcode_features_path: PathBuf,
listings_buy_path: PathBuf,
listings_rent_path: PathBuf,
) -> Self {
pub fn new(state: AppState) -> Self {
Self {
current: RwLock::new(Arc::new(state)),
reloading: AtomicBool::new(false),
properties_path,
postcode_features_path,
listings_buy_path,
listings_rent_path,
}
}
@ -116,21 +94,4 @@ impl SharedState {
pub fn load_state(&self) -> Arc<AppState> {
self.current.read().clone()
}
/// Atomically swap in a new AppState. Old state is dropped when all references are gone.
pub fn swap_state(&self, new_state: AppState) {
*self.current.write() = Arc::new(new_state);
}
/// Try to mark reload as in-progress. Returns false if already reloading.
pub fn try_start_reload(&self) -> bool {
self.reloading
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
.is_ok()
}
/// Mark reload as complete.
pub fn finish_reload(&self) {
self.reloading.store(false, Ordering::Release);
}
}