Lots of improvements
Some checks failed
CI / Python (lint + test) (push) Failing after 1m39s
CI / Frontend (lint + typecheck) (push) Failing after 1m49s
CI / Rust (lint + test) (push) Failing after 1m50s
Build and publish Docker image / build-and-push (push) Failing after 3m9s

This commit is contained in:
Andras Schmelczer 2026-04-04 10:45:48 +01:00
parent 3853b5dce7
commit b94cf17d75
33 changed files with 2587 additions and 1866 deletions

View file

@ -25,6 +25,6 @@ pub const AI_FILTERS_WEEKLY_TOKEN_LIMIT: u64 = 10_000_000;
/// Timeout for outbound HTTP service calls (seconds).
pub const SERVICE_CALL_TIMEOUT: u64 = 120;
/// Inner London free zone bounds (south, west, north, east) — roughly zone 1.
/// Demo free zone bounds (south, west, north, east) — inner London, roughly zone 1.
/// Users without a license can only query data within these bounds.
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.44, -0.31, 51.59, 0.05);

View file

@ -402,23 +402,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
modes: &[],
linked: "",
}),
Feature::Numeric(FeatureConfig {
name: "Train or tube stations within 1km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of train or tube stations within 1km",
detail: "Rail stations and Tube/metro/tram stops within 1km of the postcode. Does not include bus stops.",
source: "naptan",
prefix: "",
suffix: "",
raw: false,
absolute: false,
modes: &[],
linked: "",
}),
],
},
FeatureGroup {

View file

@ -28,7 +28,7 @@ pub fn check_license_bounds(
let body = json!({
"error": "license_required",
"message": "A license is required to view data outside inner London",
"message": "A license is required to view data outside the demo area",
"free_zone": {
"south": fz_south,
"west": fz_west,

View file

@ -5,5 +5,7 @@ mod h3;
pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds, require_bounds};
pub use fields::{parse_field_indices, parse_field_set};
pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter};
pub use filters::{
count_filter_impacts, parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter,
};
pub use h3::{cell_for_row, cell_for_row_cached, needs_parent, validate_h3_resolution};

View file

@ -121,6 +121,65 @@ pub fn row_passes_filters(
})
}
/// Single-pass marginal impact counting.
///
/// Returns `(total_passing, impacts)` where `impacts[i]` is how many MORE rows
/// would pass if the i-th filter (numeric first, then enum) were removed.
///
/// For each row we record which filters reject it:
/// - 0 failures → passes (counted in `total_passing`)
/// - exactly 1 failure → that filter's marginal cost (counted in `impacts[i]`)
/// - 2+ failures → removing any single filter won't recover it (ignored)
pub fn count_filter_impacts(
filters: &[ParsedFilter],
enum_filters: &[ParsedEnumFilter],
feature_data: &[u16],
num_features: usize,
rows: impl Iterator<Item = u32>,
) -> (u32, Vec<u32>) {
let n = filters.len() + enum_filters.len();
let mut total_passing: u32 = 0;
let mut impacts = vec![0u32; n];
for row_idx in rows {
let base = row_idx as usize * num_features;
let mut fail_count: u32 = 0;
let mut fail_index: usize = 0;
for (i, f) in filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
fail_count += 1;
fail_index = i;
if fail_count > 1 {
break;
}
}
}
if fail_count <= 1 {
for (i, f) in enum_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || !f.allowed.contains(&raw) {
fail_count += 1;
fail_index = filters.len() + i;
if fail_count > 1 {
break;
}
}
}
}
match fail_count {
0 => total_passing += 1,
1 => impacts[fail_index] += 1,
_ => {}
}
}
(total_passing, impacts)
}
#[cfg(test)]
mod tests {
use super::*;
@ -536,4 +595,85 @@ mod tests {
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
fn filter_impacts_single_pass() {
// 2 numeric features, 4 rows:
// row 0: price=150, area=100 → passes both
// row 1: price=600, area=100 → fails price only
// row 2: price=150, area=300 → fails area only
// row 3: price=600, area=300 → fails both
let tq = test_quant(2, 2);
let feature_data = vec![
tq.encode(0, 150.0), tq.encode(1, 100.0), // row 0
tq.encode(0, 600.0), tq.encode(1, 100.0), // row 1
tq.encode(0, 150.0), tq.encode(1, 300.0), // row 2
tq.encode(0, 600.0), tq.encode(1, 300.0), // row 3
];
let filters = vec![
ParsedFilter {
feat_idx: 0,
min_u16: tq.as_ref().encode_min(0, 100.0),
max_u16: tq.as_ref().encode_max(0, 500.0),
},
ParsedFilter {
feat_idx: 1,
min_u16: tq.as_ref().encode_min(1, 50.0),
max_u16: tq.as_ref().encode_max(1, 200.0),
},
];
let (total, impacts) =
count_filter_impacts(&filters, &[], &feature_data, 2, (0..4u32).into_iter());
assert_eq!(total, 1); // only row 0 passes
assert_eq!(impacts[0], 1); // row 1 fails price only
assert_eq!(impacts[1], 1); // row 2 fails area only
// row 3 fails both → not counted
}
#[test]
fn filter_impacts_with_enum() {
// 1 numeric + 1 enum, 3 rows:
// row 0: price=150, type=0(A) → passes both
// row 1: price=150, type=2(C) → fails enum only
// row 2: price=600, type=0(A) → fails numeric only
let tq = test_quant(2, 1);
let feature_data = vec![
tq.encode(0, 150.0), 0u16, // row 0
tq.encode(0, 150.0), 2u16, // row 1
tq.encode(0, 600.0), 0u16, // row 2
];
let num_filters = vec![ParsedFilter {
feat_idx: 0,
min_u16: tq.as_ref().encode_min(0, 100.0),
max_u16: tq.as_ref().encode_max(0, 500.0),
}];
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 1,
allowed: [0u16, 1].into_iter().collect(),
}];
let (total, impacts) = count_filter_impacts(
&num_filters,
&enum_filters,
&feature_data,
2,
(0..3u32).into_iter(),
);
assert_eq!(total, 1); // row 0
assert_eq!(impacts[0], 1); // row 2 fails numeric only → impacts[0]
assert_eq!(impacts[1], 1); // row 1 fails enum only → impacts[1]
}
#[test]
fn filter_impacts_no_filters() {
let tq = test_quant(1, 1);
let feature_data = vec![tq.encode(0, 100.0)];
let (total, impacts) =
count_filter_impacts(&[], &[], &feature_data, 1, (0..1u32).into_iter());
assert_eq!(total, 1);
assert!(impacts.is_empty());
}
}

View file

@ -1,6 +1,7 @@
mod ai_filters;
mod checkout;
mod export;
mod filter_counts;
mod features;
mod hexagon_stats;
pub(crate) mod hexagons;
@ -31,6 +32,7 @@ pub(crate) mod travel_time;
pub use ai_filters::{build_system_prompt, post_ai_filters};
pub use checkout::post_checkout;
pub use export::get_export;
pub use filter_counts::get_filter_counts;
pub use features::{build_features_response, get_features, FeatureInfo, FeaturesResponse};
pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
@ -43,7 +45,7 @@ pub use places::get_places;
pub use pois::{get_poi_categories, get_pois};
pub use postcode_properties::get_postcode_properties;
pub use postcode_stats::get_postcode_stats;
pub use postcodes::{get_postcode_lookup, get_postcodes};
pub use postcodes::{get_nearest_postcode, get_postcode_lookup, get_postcodes};
pub use pricing::get_pricing;
pub use properties::get_hexagon_properties;
pub use reload::post_reload;

View file

@ -39,8 +39,6 @@ pub struct AiFiltersRequest {
query: String,
/// Current filters for conversational refinement (e.g. "make it cheaper")
context: Option<AiFiltersContext>,
/// Current listing mode (historical/buy/rent). Defaults to "historical".
listing_type: Option<String>,
}
#[derive(Serialize)]
@ -62,8 +60,6 @@ pub struct AiFiltersResponse {
/// What the LLM couldn't map to existing filters (empty if everything matched)
#[serde(skip_serializing_if = "String::is_empty")]
notes: String,
/// The listing mode used for this response (historical/buy/rent)
listing_type: String,
/// Number of properties matching the proposed filters (excludes travel time)
match_count: usize,
}
@ -345,34 +341,19 @@ pub fn build_system_prompt(
modes_list,
));
// Listing modes section
// Feature guidance — only historical features are available
parts.push(
"\n--- LISTING MODES ---\n\
There are three listing modes that control which property data is shown:\n\
- \"historical\": Historical sales from Land Registry (default). Uses features like \
\"Last known price\", \"Estimated current price\", \"Price per sqm\".\n\
- \"buy\": Properties currently listed for sale. Uses features like \"Asking price\", \
\"Asking price per sqm\".\n\
- \"rent\": Properties currently listed for rent. Uses features like \
\"Asking rent (monthly)\".\n\
"\n--- DATA SOURCE ---\n\
The data is historical property sales from the Land Registry.\n\
\n\
When the user mentions buying, purchasing, for-sale properties, or asking prices, \
set listing_type to \"buy\".\n\
When the user mentions renting, letting, rental properties, or monthly rent, \
set listing_type to \"rent\".\n\
When the user doesn't specify or mentions historical prices/past sales, \
omit listing_type to keep the current mode.\n\
Use these features for price queries:\n\
- For purchase price: use \"Estimated current price\" or \"Last known price\"\n\
- For price per sqm: use \"Est. price per sqm\"\n\
- For rent: use \"Estimated monthly rent\"\n\
\n\
Features marked with [mode] below are only available in that mode. \
Features without a mode annotation work in all modes. \
ONLY use features valid for the chosen listing_type.\n\
If the user mentions price and the mode is \"buy\", use \"Asking price\" (not \"Last known price\").\n\
If the user mentions rent/price and the mode is \"rent\", use \"Asking rent (monthly)\".\n\
\n\
Feature equivalences across modes:\n\
- \"Estimated current price\" (historical) ↔ \"Asking price\" (buy)\n\
- \"Est. price per sqm\" (historical) ↔ \"Asking price per sqm\" (buy)\n\
- \"Estimated monthly rent\" (historical) ↔ \"Asking rent (monthly)\" (rent)"
Features marked with [historical] below are available. \
Features marked with [buy] or [rent] are NOT available do not use them.\n\
ONLY use features marked [historical] or unmarked."
.to_string(),
);
@ -412,7 +393,7 @@ pub fn build_system_prompt(
description,
..
} => {
// Skip Listing status — handled via listing_type field
// Skip Listing status — auto-injected as "Historical sale"
if name == "Listing status" {
continue;
}
@ -499,11 +480,11 @@ pub fn build_system_prompt(
.to_string(),
);
// Examples showing listing mode switching
// Examples showing rent and price features
parts.push(
"\nUser: \"2 bed flat to rent under £1500/month\"\n\
Output: {\"listing_type\": \"rent\", \
\"numeric_filters\": [{\"name\": \"Asking rent (monthly)\", \"bound\": \"max\", \"value\": 1500}], \
"\nUser: \"2 bed flat with rent under £1500/month\"\n\
Output: {\
\"numeric_filters\": [{\"name\": \"Estimated monthly rent\", \"bound\": \"max\", \"value\": 1500}], \
\"enum_filters\": [{\"name\": \"Property type\", \"values\": [\"Flats/Maisonettes\"]}], \
\"travel_time_filters\": [], \
\"notes\": \"\"}"
@ -511,9 +492,9 @@ pub fn build_system_prompt(
);
parts.push(
"\nUser: \"3 bed house to buy under 500k with good schools\"\n\
Output: {\"listing_type\": \"buy\", \
\"numeric_filters\": [{\"name\": \"Asking price\", \"bound\": \"max\", \"value\": 500000}, \
"\nUser: \"3 bed house under 500k with good schools\"\n\
Output: {\
\"numeric_filters\": [{\"name\": \"Estimated current price\", \"bound\": \"max\", \"value\": 500000}, \
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}], \
\"enum_filters\": [{\"name\": \"Property type\", \
\"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
@ -525,11 +506,9 @@ pub fn build_system_prompt(
// Output format reminder
parts.push(
"\n--- OUTPUT FORMAT ---\n\
{\"listing_type\": \"buy\"|\"rent\" (OPTIONAL — only when switching mode), \
\"numeric_filters\": [...], \"enum_filters\": [...], \
{\"numeric_filters\": [...], \"enum_filters\": [...], \
\"travel_time_filters\": [{\"mode\": \"...\", \"slug\": \"...\", \"label\": \"...\", \
\"bound\": \"min\"|\"max\", \"value\": N}, ...], \"notes\": \"...\"}\n\
- listing_type: include only when the user explicitly wants to buy or rent. Omit to keep current mode.\n\
- travel_time_filters: use ONLY slugs returned by search_destinations. If a place isn't found, mention it in notes.\n\
Respond with ONLY the JSON object. No explanation."
.to_string(),
@ -779,17 +758,9 @@ pub async fn post_ai_filters(
let tools = build_tool_declarations(&state);
// Resolve current listing mode from request
let current_mode = req.listing_type.as_deref().unwrap_or("historical");
let current_mode = match current_mode {
"historical" | "buy" | "rent" => current_mode,
_ => "historical",
};
// Build user message with listing mode and optional context for conversational refinement
// Build user message with optional context for conversational refinement
let user_text = if let Some(ref ctx) = req.context {
let mut msg = String::new();
msg.push_str(&format!("Current listing mode: {}\n", current_mode));
msg.push_str("Currently active filters:\n");
msg.push_str(&serde_json::to_string(&ctx.filters).unwrap_or_default());
if !ctx.travel_time.is_empty() {
@ -807,10 +778,7 @@ pub async fn post_ai_filters(
msg.push_str(&format!("\nUser request: {}", req.query));
msg
} else {
format!(
"Current listing mode: {}\nUser request: {}",
current_mode, req.query
)
req.query.clone()
};
let mut contents = vec![json!({
@ -967,17 +935,8 @@ pub async fn post_ai_filters(
}
};
// Resolve listing_type: LLM output > request > "historical"
let listing_type = raw
.get("listing_type")
.and_then(|val| val.as_str())
.unwrap_or(current_mode);
let listing_type = match listing_type {
"historical" | "buy" | "rent" => listing_type,
_ => current_mode,
};
let mut filters = validate_and_convert(&raw, &state.features_response, listing_type);
// Only historical mode is supported — validate features accordingly
let mut filters = validate_and_convert(&raw, &state.features_response, "historical");
let travel_time_filters = validate_travel_time_filters(&raw, &state);
let notes = raw
.get("notes")
@ -985,14 +944,12 @@ pub async fn post_ai_filters(
.unwrap_or("")
.to_string();
// Auto-inject Listing status filter for the chosen mode
let listing_value = match listing_type {
"buy" => "For sale",
"rent" => "For rent",
_ => "Historical sale",
};
// Auto-inject Listing status filter for historical mode
if let Value::Object(ref mut map) = filters {
map.insert("Listing status".to_string(), json!([listing_value]));
map.insert(
"Listing status".to_string(),
json!(["Historical sale"]),
);
}
// Count matching properties and refine if too restrictive
@ -1031,7 +988,6 @@ pub async fn post_ai_filters(
filters,
travel_time_filters,
notes,
listing_type: listing_type.to_string(),
match_count: 0,
}));
}
@ -1073,7 +1029,7 @@ pub async fn post_ai_filters(
let log_state = state.clone();
let log_user_id = user.id.clone();
let log_query = req.query.clone();
let log_listing_type = listing_type.to_string();
let log_listing_type = "historical".to_string();
let log_notes = notes.clone();
let log_rounds = (round + 1) as u64;
tokio::spawn(async move {
@ -1094,7 +1050,6 @@ pub async fn post_ai_filters(
filters,
travel_time_filters,
notes,
listing_type: listing_type.to_string(),
match_count,
}));
}

View file

@ -0,0 +1,203 @@
use std::sync::Arc;
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::consts::NAN_U16;
use crate::data::travel_time::TravelData;
use crate::parsing::{parse_filters, require_bounds};
use crate::routes::travel_time::parse_optional_travel;
use crate::state::SharedState;
#[derive(Deserialize)]
pub struct FilterCountsParams {
bounds: Option<String>,
filters: Option<String>,
travel: Option<String>,
}
#[derive(Serialize)]
pub struct FilterCountsResponse {
total: u32,
impacts: FxHashMap<String, u32>,
}
pub async fn get_filter_counts(
State(shared): State<Arc<SharedState>>,
Query(params): Query<FilterCountsParams>,
) -> Result<Json<FilterCountsResponse>, axum::response::Response> {
let state = shared.load_state();
let (south, west, north, east) =
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
let quant = state.data.quant_ref();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
&quant,
)
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let num_regular = parsed_filters.len() + parsed_enum_filters.len();
// Only travel entries with a filter range count as filters for impact tracking
let travel_filter_indices: Vec<usize> = travel_entries
.iter()
.enumerate()
.filter(|(_, e)| e.filter_min.is_some())
.map(|(i, _)| i)
.collect();
let num_total_filters = num_regular + travel_filter_indices.len();
if num_total_filters == 0 {
return Ok(Json(FilterCountsResponse {
total: 0,
impacts: FxHashMap::default(),
}));
}
let filters_str = params.filters;
let response = tokio::task::spawn_blocking(move || -> Result<FilterCountsResponse, String> {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
// Load travel time data
let travel_data: Vec<TravelData> = travel_entries
.iter()
.map(|entry| {
state
.travel_time_store
.get(&entry.mode, &entry.slug)
.map_err(|err| format!("Failed to load travel data: {}", err))
})
.collect::<Result<Vec<_>, _>>()?;
let has_travel = !travel_entries.is_empty();
let (pc_interner, pc_keys) = state.data.postcode_parts();
let rows = state.grid.query(south, west, north, east);
let row_count = rows.len();
let mut total_passing: u32 = 0;
let mut impacts = vec![0u32; num_total_filters];
for row_idx in rows {
let row = row_idx as usize;
let base = row * num_features;
let mut fail_count: u32 = 0;
let mut fail_index: usize = 0;
// Test numeric filters
for (i, f) in parsed_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
fail_count += 1;
fail_index = i;
if fail_count > 1 {
break;
}
}
}
// Test enum filters
if fail_count <= 1 {
for (i, f) in parsed_enum_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || !f.allowed.contains(&raw) {
fail_count += 1;
fail_index = parsed_filters.len() + i;
if fail_count > 1 {
break;
}
}
}
}
// Test travel time filters
if fail_count <= 1 && has_travel {
let postcode = pc_interner.resolve(&pc_keys[row]);
for (slot, &ti) in travel_filter_indices.iter().enumerate() {
let entry = &travel_entries[ti];
let minutes = travel_data[ti].get(postcode).map(|r| {
if entry.use_best {
r.best_minutes.unwrap_or(r.minutes)
} else {
r.minutes
}
});
let passes = match (minutes, entry.filter_min, entry.filter_max) {
(Some(mins), Some(fmin), Some(fmax)) => {
(mins as f32) >= fmin && (mins as f32) <= fmax
}
(None, Some(_), Some(_)) => false,
_ => true,
};
if !passes {
fail_count += 1;
fail_index = num_regular + slot;
if fail_count > 1 {
break;
}
}
}
}
match fail_count {
0 => total_passing += 1,
1 => impacts[fail_index] += 1,
_ => {}
}
}
// Map filter indices back to feature/travel names
let mut impact_map: FxHashMap<String, u32> = FxHashMap::default();
for (i, &count) in impacts.iter().enumerate() {
if count == 0 {
continue;
}
let name = if i < parsed_filters.len() {
state.data.feature_names[parsed_filters[i].feat_idx].clone()
} else if i < num_regular {
let ei = i - parsed_filters.len();
state.data.feature_names[parsed_enum_filters[ei].feat_idx].clone()
} else {
let slot = i - num_regular;
let ti = travel_filter_indices[slot];
let e = &travel_entries[ti];
format!("tt_{}_{}", e.mode, e.slug)
};
impact_map.insert(name, count);
}
let elapsed = t0.elapsed();
info!(
rows = row_count,
filters = num_total_filters,
travel = travel_filter_indices.len(),
total = total_passing,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/filter-counts"
);
Ok(FilterCountsResponse {
total: total_passing,
impacts: impact_map,
})
})
.await
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response())?
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err).into_response())?;
Ok(Json(response))
}

View file

@ -144,6 +144,7 @@ fn rebuild_data(shared: &SharedState, start: Instant) -> anyhow::Result<(usize,
poi_grid: Arc::clone(&old.poi_grid),
place_data: Arc::clone(&old.place_data),
postcode_data: Arc::clone(&old.postcode_data),
outcode_data: Arc::clone(&old.outcode_data),
poi_category_groups: Arc::clone(&old.poi_category_groups),
travel_time_store: Arc::clone(&old.travel_time_store),
token_cache: Arc::clone(&old.token_cache),