good changes
This commit is contained in:
parent
160283f1a1
commit
c997ea46a5
26 changed files with 991 additions and 288 deletions
|
|
@ -68,9 +68,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
features: &[
|
||||
FeatureConfig {
|
||||
name: "Last known price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Most recent sale price from the Land Registry",
|
||||
|
|
@ -79,15 +79,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["historical"],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Estimated current price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Inflation-adjusted estimate of the current property value",
|
||||
|
|
@ -96,7 +96,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["historical"],
|
||||
linked: "Asking price",
|
||||
},
|
||||
|
|
@ -252,9 +252,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
FeatureConfig {
|
||||
name: "Asking price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Listed asking price for properties currently for sale",
|
||||
|
|
@ -263,7 +263,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["buy"],
|
||||
linked: "Estimated current price",
|
||||
},
|
||||
|
|
@ -949,6 +949,23 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Median age",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.5,
|
||||
description: "Median age of the local population",
|
||||
detail: "From the 2021 Census (TS007A). Median age of usual residents in the LSOA, computed by linear interpolation from five-year age band counts. Areas with younger populations tend to be urban, university towns, or have more families; older medians are typical in rural and coastal areas.",
|
||||
source: "census-2021",
|
||||
prefix: "",
|
||||
suffix: " years",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -996,8 +1013,8 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Number of parks and green spaces within 2km",
|
||||
detail: "Count of parks, gardens, nature reserves, and other green spaces within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
|
||||
source: "osm-pois",
|
||||
detail: "Count of public parks, gardens, playing fields, and play spaces within a 2km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset (Ordnance Survey), which provides authoritative polygon boundaries for green spaces across Great Britain.",
|
||||
source: "os-open-greenspace",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
|
|
@ -1005,6 +1022,23 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Distance to nearest park (km)",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.1,
|
||||
description: "Distance to the closest park or green space",
|
||||
detail: "Straight-line distance in kilometres from the property's postcode centroid to the nearest public park, garden, playing field, or play space. Derived from the OS Open Greenspace dataset (Ordnance Survey), which provides authoritative polygon boundaries for green spaces across Great Britain.",
|
||||
source: "os-open-greenspace",
|
||||
prefix: "",
|
||||
suffix: " km",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -1056,6 +1090,13 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
detail: "From HM Land Registry Price Paid data and EPC certificates. Detached, Semi-Detached, Terraced (includes all terrace sub-types), Flats/Maisonettes, or Other (bungalows, park homes, etc.).",
|
||||
source: "price-paid",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Former council house",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether the property was ever recorded as social housing",
|
||||
detail: "Derived from the TENURE field in Energy Performance Certificate data. If any EPC certificate for this property recorded the tenure as social rental, it indicates the property was council or housing-association stock at the time of that inspection. Properties that were later sold (e.g. via Right to Buy) retain this flag.",
|
||||
source: "epc",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Current energy rating",
|
||||
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||
|
|
@ -1082,55 +1123,6 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
detail: "Maximum available fixed broadband download speed in Megabits per second, from Ofcom's Connected Nations 2025 report. Measured at Output Area level and represents the maximum speed available from any provider, not actual achieved speeds. Tiers: 10 = basic, 30 = superfast (SFBB), 100 = ultrafast 100Mbit, 300 = ultrafast (UFBB), 1000 = gigabit.",
|
||||
source: "broadband",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Environmental risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Highest ground stability risk across all six hazard types",
|
||||
detail: "Overall ground stability risk for the area, taken as the maximum across all six GeoSure hazard categories (collapsible deposits, compressible ground, landslides, running sand, shrink-swell, and soluble rocks). From Ordnance Survey GeoSure data on a 5km hex grid.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Collapsible deposits risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of ground collapse from natural underground cavities",
|
||||
detail: "From OS GeoSure. Indicates the likelihood of ground collapse due to natural cavities formed by dissolution of soluble rocks or the collapse of old mines and natural pipes. Rated on a 5km hex grid across Great Britain.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Compressible ground risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of ground compression causing subsidence",
|
||||
detail: "From OS GeoSure. Indicates the potential for ground to compress under loading, which can cause gradual settlement or subsidence of buildings and infrastructure. Typically associated with soft clay, silt, or peat deposits.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Landslide risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of landslide or slope instability",
|
||||
detail: "From OS GeoSure. Indicates the susceptibility of the ground to landslides and slope instability. Based on slope angle, geology, and historical landslide records.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Running sand risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of sand becoming fluid when saturated",
|
||||
detail: "From OS GeoSure. Indicates the potential for fine-grained sand to behave like a fluid when saturated with water, which can affect excavations and foundations.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Shrink-swell risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of clay shrinking and swelling with moisture changes",
|
||||
detail: "From OS GeoSure. Indicates the potential for clay-rich soils to shrink when dry and swell when wet, causing ground movement that can damage buildings and infrastructure. One of the most common causes of subsidence in the UK.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Soluble rocks risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of sinkholes from dissolution of soluble rocks",
|
||||
detail: "From OS GeoSure. Indicates the potential for soluble rocks (limestone, chalk, gypsum) to dissolve, creating underground voids that can lead to sinkholes and ground subsidence.",
|
||||
source: "geosure",
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ use tracing::{info, warn};
|
|||
use crate::auth::OptionalUser;
|
||||
use crate::consts::{AI_FILTERS_MAX_TOKENS, AI_FILTERS_TEMPERATURE, AI_FILTERS_WEEKLY_TOKEN_LIMIT};
|
||||
use crate::data::slugify;
|
||||
use crate::data::travel_time::TravelData;
|
||||
use crate::parsing::{parse_filters, row_passes_filters};
|
||||
use crate::pocketbase::{get_superuser_token, log_ai_query};
|
||||
use crate::routes::{FeatureInfo, FeaturesResponse};
|
||||
use crate::state::{AppState, SharedState};
|
||||
|
|
@ -62,6 +64,8 @@ pub struct AiFiltersResponse {
|
|||
notes: String,
|
||||
/// The listing mode used for this response (historical/buy/rent)
|
||||
listing_type: String,
|
||||
/// Number of properties matching the proposed filters (excludes travel time)
|
||||
match_count: usize,
|
||||
}
|
||||
|
||||
/// Strip markdown code fences (```json ... ``` or ``` ... ```) from LLM output.
|
||||
|
|
@ -556,6 +560,117 @@ async fn update_ai_usage(state: &AppState, user_id: &str, tokens_used: u64, week
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert validated filter JSON back to the `;;`-separated filter string format
|
||||
/// that `parse_filters` expects.
|
||||
///
|
||||
/// Numeric: `{"name": [min, max]}` → `name:min:max`
|
||||
/// Enum: `{"name": ["val1", "val2"]}` → `name:val1|val2`
|
||||
fn filters_to_filter_string(filters: &Value) -> String {
|
||||
let obj = match filters.as_object() {
|
||||
Some(obj) => obj,
|
||||
None => return String::new(),
|
||||
};
|
||||
|
||||
let mut parts = Vec::new();
|
||||
for (name, value) in obj {
|
||||
if let Some(arr) = value.as_array() {
|
||||
if arr.len() == 2 && arr[0].is_number() && arr[1].is_number() {
|
||||
let min = arr[0].as_f64().unwrap_or(0.0);
|
||||
let max = arr[1].as_f64().unwrap_or(0.0);
|
||||
parts.push(format!("{name}:{min}:{max}"));
|
||||
} else if !arr.is_empty() && arr[0].is_string() {
|
||||
let values: Vec<&str> = arr.iter().filter_map(|v| v.as_str()).collect();
|
||||
if !values.is_empty() {
|
||||
parts.push(format!("{name}:{}", values.join("|")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parts.join(";;")
|
||||
}
|
||||
|
||||
/// Count how many rows in the property dataset pass the given property filters
|
||||
/// AND travel time filters. Travel time data is loaded from the TravelTimeStore
|
||||
/// and checked per-postcode (same logic as hexagons.rs).
|
||||
fn count_matching_rows(
|
||||
state: &AppState,
|
||||
filters: &Value,
|
||||
travel_time_filters: &[TravelTimeFilter],
|
||||
) -> usize {
|
||||
let filter_str = filters_to_filter_string(filters);
|
||||
|
||||
let quant = state.data.quant_ref();
|
||||
let (parsed_filters, parsed_enum_filters) = if filter_str.is_empty() {
|
||||
(Vec::new(), Vec::new())
|
||||
} else {
|
||||
match parse_filters(
|
||||
Some(&filter_str),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
&quant,
|
||||
) {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse filters for match count: {err}");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Load travel time data for each filter entry
|
||||
let travel_data: Vec<(TravelData, Option<f32>, Option<f32>)> = travel_time_filters
|
||||
.iter()
|
||||
.filter_map(|ttf| {
|
||||
let data = state.travel_time_store.get(&ttf.mode, &ttf.slug).ok()?;
|
||||
Some((data, ttf.min, ttf.max))
|
||||
})
|
||||
.collect();
|
||||
let has_travel = !travel_data.is_empty();
|
||||
|
||||
let feature_data = &state.data.feature_data;
|
||||
let num_features = state.data.num_features;
|
||||
let num_rows = state.data.lat.len();
|
||||
let (pc_interner, pc_keys) = state.data.postcode_parts();
|
||||
|
||||
let mut count = 0usize;
|
||||
for row in 0..num_rows {
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if has_travel {
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
let mut passes_travel = true;
|
||||
for (data, fmin, fmax) in &travel_data {
|
||||
let pass = if let Some(mins) = data.get(postcode).map(|r| r.minutes as f32) {
|
||||
fmin.map_or(true, |min| mins >= min)
|
||||
&& fmax.map_or(true, |max| mins <= max)
|
||||
} else {
|
||||
false // no travel data → postcode not reachable
|
||||
};
|
||||
if !pass {
|
||||
passes_travel = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !passes_travel {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// Maximum number of round trips (function calls + retries) before giving up.
|
||||
const MAX_TOOL_ROUNDS: usize = 5;
|
||||
|
||||
|
|
@ -631,6 +746,7 @@ pub async fn post_ai_filters(
|
|||
})];
|
||||
|
||||
let mut total_tokens_accumulated: u64 = 0;
|
||||
let mut refinement_attempts = 0u32;
|
||||
|
||||
// Function calling loop: model may call search_destinations, we execute and feed back
|
||||
for round in 0..MAX_TOOL_ROUNDS {
|
||||
|
|
@ -776,6 +892,42 @@ pub async fn post_ai_filters(
|
|||
map.insert("Listing status".to_string(), json!([listing_value]));
|
||||
}
|
||||
|
||||
// Count matching properties and refine if too restrictive
|
||||
let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
info!(match_count = match_count, round = round, "AI filter match count");
|
||||
|
||||
if match_count == 0 {
|
||||
refinement_attempts += 1;
|
||||
let total_rows = state.data.lat.len();
|
||||
info!(
|
||||
attempt = refinement_attempts,
|
||||
"0 matches out of {total_rows} — asking AI to relax filters"
|
||||
);
|
||||
let feedback = match refinement_attempts {
|
||||
1 => format!(
|
||||
"Your proposed filters matched 0 properties out of {total_rows} total. \
|
||||
The combination is too restrictive. Please widen some numeric ranges \
|
||||
or add more enum values while keeping the user's intent. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
2 => format!(
|
||||
"Still 0 matches out of {total_rows}. Please widen ranges further. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
_ => format!(
|
||||
"Still 0 matches out of {total_rows}. Please remove additional filters \
|
||||
until some properties match, keeping the user's core priority. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
};
|
||||
contents.push(candidate.clone());
|
||||
contents.push(json!({
|
||||
"role": "user",
|
||||
"parts": [{ "text": feedback }]
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update usage with total accumulated tokens
|
||||
let new_total = tokens_used + total_tokens_accumulated;
|
||||
update_ai_usage(&state, &user.id, new_total, current_week).await;
|
||||
|
|
@ -810,6 +962,7 @@ pub async fn post_ai_filters(
|
|||
travel_time_filters,
|
||||
notes,
|
||||
listing_type: listing_type.to_string(),
|
||||
match_count,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -902,8 +1055,10 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
|
|||
fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type: &str) -> Value {
|
||||
let mut result = serde_json::Map::new();
|
||||
|
||||
// Build lookup maps from feature metadata, filtering by listing mode
|
||||
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32)> =
|
||||
// Build lookup maps from feature metadata, filtering by listing mode.
|
||||
// Store both slider bounds (min/max from percentiles) and true data bounds
|
||||
// (histogram.min/max) so one-sided AI filters use the full data range.
|
||||
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32, f32, f32)> =
|
||||
rustc_hash::FxHashMap::default();
|
||||
let mut enum_features: rustc_hash::FxHashMap<&str, &[String]> =
|
||||
rustc_hash::FxHashMap::default();
|
||||
|
|
@ -915,12 +1070,14 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
|
|||
name,
|
||||
min,
|
||||
max,
|
||||
histogram,
|
||||
modes,
|
||||
..
|
||||
} => {
|
||||
// Only include features valid for the chosen listing mode
|
||||
if modes.is_empty() || modes.contains(&listing_type) {
|
||||
numeric_features.insert(name, (*min, *max));
|
||||
numeric_features
|
||||
.insert(name, (*min, *max, histogram.min, histogram.max));
|
||||
}
|
||||
}
|
||||
FeatureInfo::Enum { name, values, .. } => {
|
||||
|
|
@ -933,32 +1090,37 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
|
|||
}
|
||||
}
|
||||
|
||||
// Process numeric filters — each sets one bound (min or max)
|
||||
// Process numeric filters — each sets one bound (min or max).
|
||||
// The unset side uses the true data min/max (from histogram), not
|
||||
// the slider bounds (percentile-based), so a "max" filter for crime
|
||||
// produces [0, value] rather than [2nd-percentile, value].
|
||||
if let Some(arr) = raw.get("numeric_filters").and_then(|val| val.as_array()) {
|
||||
for item in arr {
|
||||
let name = match item.get("name").and_then(|val| val.as_str()) {
|
||||
Some(name) => name,
|
||||
None => continue,
|
||||
};
|
||||
let (feat_min, feat_max) = match numeric_features.get(name) {
|
||||
Some(range) => *range,
|
||||
None => continue,
|
||||
};
|
||||
let (slider_min, slider_max, data_min, data_max) =
|
||||
match numeric_features.get(name) {
|
||||
Some(range) => *range,
|
||||
None => continue,
|
||||
};
|
||||
let bound = match item.get("bound").and_then(|val| val.as_str()) {
|
||||
Some(b) => b,
|
||||
None => continue,
|
||||
};
|
||||
// Clamp value to true data range (not slider range)
|
||||
let value = match item.get("value").and_then(|val| val.as_f64()) {
|
||||
Some(v) => v.max(feat_min as f64).min(feat_max as f64) as f32,
|
||||
Some(v) => v.max(data_min as f64).min(data_max as f64) as f32,
|
||||
None => continue,
|
||||
};
|
||||
let (filter_min, filter_max) = match bound {
|
||||
"min" => (value, feat_max),
|
||||
"max" => (feat_min, value),
|
||||
"min" => (value, data_max),
|
||||
"max" => (data_min, value),
|
||||
_ => continue,
|
||||
};
|
||||
// Only include if range is narrower than full range
|
||||
if filter_min > feat_min || filter_max < feat_max {
|
||||
// Only include if range is narrower than full slider range
|
||||
if filter_min > slider_min || filter_max < slider_max {
|
||||
result.insert(name.to_string(), json!([filter_min, filter_max]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue