Fun changes
Some checks failed
CI / Python (lint + test) (push) Failing after 3m38s
CI / Rust (lint + test) (push) Failing after 3m32s
CI / Frontend (lint + typecheck) (push) Failing after 4m12s
Build and publish Docker image / build-and-push (push) Failing after 4m48s

This commit is contained in:
Andras Schmelczer 2026-04-04 22:59:44 +01:00
parent cd778dd088
commit 349a6c1d53
60 changed files with 1260 additions and 2600 deletions

View file

@ -280,7 +280,7 @@ pub fn build_system_prompt(
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 2km.\n\
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 1km.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
(note: this counts bedrooms + living rooms combined, so 3 bed ~ min 4).\n\
@ -341,7 +341,7 @@ pub fn build_system_prompt(
modes_list,
));
// Feature guidance — only historical features are available
// Feature guidance
parts.push(
"\n--- DATA SOURCE ---\n\
The data is historical property sales from the Land Registry.\n\
@ -349,11 +349,7 @@ pub fn build_system_prompt(
Use these features for price queries:\n\
- For purchase price: use \"Estimated current price\" or \"Last known price\"\n\
- For price per sqm: use \"Est. price per sqm\"\n\
- For rent: use \"Estimated monthly rent\"\n\
\n\
Features marked with [historical] below are available. \
Features marked with [buy] or [rent] are NOT available do not use them.\n\
ONLY use features marked [historical] or unmarked."
- For rent estimates: use \"Estimated monthly rent\""
.to_string(),
);
@ -374,17 +370,11 @@ pub fn build_system_prompt(
description,
prefix,
suffix,
modes,
..
} => {
let mode_str = if modes.is_empty() {
String::new()
} else {
format!(" [{}]", modes.join("/"))
};
parts.push(format!(
"- \"{}\"{} (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, mode_str, prefix, min, suffix, prefix, max, suffix, description
"- \"{}\" (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, prefix, min, suffix, prefix, max, suffix, description
));
}
FeatureInfo::Enum {
@ -393,10 +383,6 @@ pub fn build_system_prompt(
description,
..
} => {
// Skip Listing status — auto-injected as "Historical sale"
if name == "Listing status" {
continue;
}
parts.push(format!(
"- \"{}\" (enum, values: [{}]): {}",
name,
@ -433,7 +419,7 @@ pub fn build_system_prompt(
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
{\"name\": \"Number of parks within 2km\", \"bound\": \"min\", \"value\": 3}], \
{\"name\": \"Number of parks within 1km\", \"bound\": \"min\", \"value\": 3}], \
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
.to_string(),
);
@ -935,8 +921,7 @@ pub async fn post_ai_filters(
}
};
// Only historical mode is supported — validate features accordingly
let mut filters = validate_and_convert(&raw, &state.features_response, "historical");
let filters = validate_and_convert(&raw, &state.features_response);
let travel_time_filters = validate_travel_time_filters(&raw, &state);
let notes = raw
.get("notes")
@ -944,11 +929,6 @@ pub async fn post_ai_filters(
.unwrap_or("")
.to_string();
// Auto-inject Listing status filter for historical mode
if let Value::Object(ref mut map) = filters {
map.insert("Listing status".to_string(), json!(["Historical sale"]));
}
// Count matching properties and refine if too restrictive
let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
info!(
@ -1026,7 +1006,6 @@ pub async fn post_ai_filters(
let log_state = state.clone();
let log_user_id = user.id.clone();
let log_query = req.query.clone();
let log_listing_type = "historical".to_string();
let log_notes = notes.clone();
let log_rounds = (round + 1) as u64;
tokio::spawn(async move {
@ -1034,7 +1013,6 @@ pub async fn post_ai_filters(
&log_state,
&log_user_id,
&log_query,
&log_listing_type,
&filters_json,
&log_notes,
total_tokens_accumulated,
@ -1137,10 +1115,10 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
/// ```json
/// { "Last known price": [0, 300000], "Leasehold/Freehold": ["Freehold"] }
/// ```
fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type: &str) -> Value {
fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
let mut result = serde_json::Map::new();
// Build lookup maps from feature metadata, filtering by listing mode.
// Build lookup maps from feature metadata.
// Store both slider bounds (min/max from percentiles) and true data bounds
// (histogram.min/max) so one-sided AI filters use the full data range.
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32, f32, f32)> =
@ -1156,19 +1134,12 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
min,
max,
histogram,
modes,
..
} => {
// Only include features valid for the chosen listing mode
if modes.is_empty() || modes.contains(&listing_type) {
numeric_features.insert(name, (*min, *max, histogram.min, histogram.max));
}
numeric_features.insert(name, (*min, *max, histogram.min, histogram.max));
}
FeatureInfo::Enum { name, values, .. } => {
// Skip Listing status — handled via auto-injection
if name != "Listing status" {
enum_features.insert(name, values);
}
enum_features.insert(name, values);
}
}
}

View file

@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::sync::Arc;
use axum::extract::State;
@ -17,10 +18,6 @@ fn is_false(val: &bool) -> bool {
!val
}
fn is_empty_slice(val: &&[&str]) -> bool {
val.is_empty()
}
#[derive(Clone, Serialize)]
#[serde(tag = "type")]
pub enum FeatureInfo {
@ -42,15 +39,12 @@ pub enum FeatureInfo {
raw: bool,
#[serde(skip_serializing_if = "is_false")]
absolute: bool,
#[serde(skip_serializing_if = "is_empty_slice")]
modes: &'static [&'static str],
#[serde(skip_serializing_if = "is_empty")]
linked: &'static str,
},
#[serde(rename = "enum")]
Enum {
name: String,
values: Vec<String>,
counts: HashMap<String, u64>,
description: &'static str,
detail: &'static str,
source: &'static str,
@ -98,8 +92,6 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
suffix: config.suffix,
raw: config.raw,
absolute: config.absolute,
modes: config.modes,
linked: config.linked,
});
}
}
@ -110,9 +102,22 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
.position(|name| name == config.name)
{
if let Some(values) = data.enum_values.get(&feat_idx) {
let counts = data
.enum_counts
.get(&feat_idx)
.map(|c| {
values
.iter()
.zip(c.iter())
.filter(|(_, &count)| count > 0)
.map(|(v, &count)| (v.clone(), count))
.collect()
})
.unwrap_or_default();
features.push(FeatureInfo::Enum {
name: config.name.to_string(),
values: values.clone(),
counts,
description: config.description,
detail: config.detail,
source: config.source,

View file

@ -13,6 +13,7 @@ use tracing::info;
use crate::aggregation::{Aggregator, EnumDistConfig};
use crate::auth::OptionalUser;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::pocketbase::log_user_location;
use crate::data::travel_time::TravelData;
use crate::licensing::check_license_bounds;
use crate::parsing::{
@ -339,8 +340,10 @@ pub async fn get_postcodes(
}
/// Find the nearest postcode to a given lat/lng coordinate.
/// If the user is authenticated, logs their location to PocketBase in the background.
pub async fn get_nearest_postcode(
State(shared): State<Arc<SharedState>>,
Extension(user): Extension<OptionalUser>,
Query(params): Query<NearestPostcodeParams>,
) -> Result<Json<Value>, StatusCode> {
let state = shared.load_state();
@ -368,6 +371,18 @@ pub async fn get_nearest_postcode(
let geometry = postcode_data.geometries[idx].clone();
let postcode = &postcode_data.postcodes[idx];
// Log location for authenticated users (best-effort, non-blocking)
if let Some(ref pb_user) = user.0 {
let state = state.clone();
let user_id = pb_user.id.clone();
let lat_f64 = params.lat;
let lng_f64 = params.lng;
let pc = postcode.clone();
tokio::spawn(async move {
log_user_location(&state, &user_id, lat_f64, lng_f64, &pc).await;
});
}
info!(postcode = %postcode, "GET /api/nearest-postcode");
Ok(Json(serde_json::json!({
"postcode": postcode,

View file

@ -38,8 +38,6 @@ pub struct Property {
pub duration: Option<String>,
pub current_energy_rating: Option<String>,
pub potential_energy_rating: Option<String>,
pub listing_status: Option<String>,
pub listing_url: Option<String>,
pub property_sub_type: Option<String>,
pub price_qualifier: Option<String>,
pub former_council_house: Option<String>,
@ -53,9 +51,6 @@ pub struct Property {
#[serde(skip_serializing_if = "Vec::is_empty")]
pub renovation_history: Vec<RenovationEvent>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub listing_features: Vec<String>,
#[serde(flatten)]
pub features: FxHashMap<String, f32>,
}
@ -158,15 +153,6 @@ pub fn build_property(
lat: state.data.lat[row],
lon: state.data.lon[row],
renovation_history: state.data.renovation_history(row).to_vec(),
listing_features: state.data.listing_features(row).to_vec(),
listing_status: lookup_enum_value(
feature_name_to_index,
&state.data,
enum_values,
row,
"Listing status",
),
listing_url: state.data.listing_url(row).map(String::from),
property_sub_type: state.data.property_sub_type(row).map(String::from),
price_qualifier: state.data.price_qualifier(row).map(String::from),
former_council_house: lookup_enum_value(