This commit is contained in:
Andras Schmelczer 2026-05-13 08:00:12 +01:00
parent 63713c3a2b
commit bd6b511f16
17 changed files with 544 additions and 377 deletions

View file

@ -983,81 +983,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
FeatureGroup {
name: "Amenities",
features: &[
Feature::Numeric(FeatureConfig {
name: "Distance to nearest park (km)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Distance to the closest park or green space",
detail: "Straight-line distance in kilometres from the postcode to the nearest park entrance. Covers public parks, gardens, playing fields, and play spaces. Uses access point locations from the OS Open Greenspace dataset, so properties bordering a large park correctly show a short distance.",
source: "os-open-greenspace",
prefix: "",
suffix: " km",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Distance to nearest grocery store (km)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Distance to the closest grocery shop or supermarket",
detail: "Straight-line distance in kilometres from the postcode to the nearest grocery shop, supermarket, or convenience store. Uses OpenStreetMap POIs, with Waitrose and Tesco coverage from GEOLYTIX retail points.",
source: "osm-pois",
prefix: "",
suffix: " km",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Distance to nearest cafe (km)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Distance to the closest cafe",
detail: "Straight-line distance in kilometres from the postcode to the nearest cafe, ice-cream shop, or internet cafe mapped in OpenStreetMap.",
source: "osm-pois",
prefix: "",
suffix: " km",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Distance to nearest pub (km)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Distance to the closest pub",
detail: "Straight-line distance in kilometres from the postcode to the nearest pub, social club, brewery, distillery, or winery mapped in OpenStreetMap.",
source: "osm-pois",
prefix: "",
suffix: " km",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Distance to nearest restaurant (km)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Distance to the closest restaurant",
detail: "Straight-line distance in kilometres from the postcode to the nearest restaurant or food court mapped in OpenStreetMap.",
source: "osm-pois",
prefix: "",
suffix: " km",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Noise (dB)",
bounds: Bounds::Fixed {

View file

@ -49,8 +49,8 @@ fn seo_page_for_path(path: &str) -> Option<SeoPage> {
match path {
"/" => Some(SeoPage {
canonical_path: "/",
title: "Find the best postcodes and areas to live in England | Perfect Postcode",
description: "Discover where to live by comparing England postcodes by budget, commute, schools, crime, noise, broadband, property prices and local amenities before viewing homes.",
title: "Stop searching the wrong places | Perfect Postcode",
description: "Filter every postcode in England by budget, commute, schools, crime, noise, broadband, property prices and amenities before you start chasing viewings.",
indexable: true,
}),
"/learn" | "/support" => Some(SeoPage {

View file

@ -359,9 +359,18 @@ pub fn build_system_prompt(
or \"max\" (at most this value). Never set two filters on the same feature.\n\
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of amenities (Park) within 2km.\n\
- \"low crime\" / \"safe\" = low values on the Serious crime and Minor crime features. \
Prefer the per-1k resident crime features for broad area safety; use specific crime \
features only when the user names a crime type.\n\
- \"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of amenities (Park) within 2km \
or low Distance to nearest park (km), depending on wording.\n\
- \"good schools\" = Good+ school features. \"outstanding schools\" = Outstanding school features.\n\
- Amenities and transport stops are normal filters in the feature catalogue. \
For \"near a bus stop\", \"near a station\", \"near shops\", etc., use the exact \
Distance to nearest amenity (...) or Number of amenities (...) feature when available.\n\
- Politics/elections are normal filters in the Neighbours group. Use exact vote share \
features such as % Labour, % Conservative, % Liberal Democrat, % Reform UK, % Green, \
% Other parties, or Voter turnout (%) when the user asks for political character.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
(note: this counts bedrooms + living rooms combined, so 3 bed ~ min 4).\n\
@ -393,12 +402,14 @@ pub fn build_system_prompt(
You can add travel time filters when the user mentions commute times, \
proximity to places, or wanting to be near/within X minutes of somewhere.\n\
\n\
Available transport modes (only use modes that have destinations):\n\
Available travel-time modes (only use modes that have destinations):\n\
{}\n\
- \"car\" / \"drive\" / \"driving\" = car mode\n\
- \"cycle\" / \"bike\" / \"cycling\" = bicycle mode\n\
- \"walk\" / \"walking\" / \"on foot\" = walking mode\n\
- \"train\" / \"tube\" / \"bus\" / \"public transport\" / \"commute\" = transit mode\n\
- If a mode appears in the available mode list but is not named above, you may still \
use the exact mode string from the list.\n\
\n\
When the user mentions a specific place, you MUST call the search_destinations \
tool to find the exact slug. Use the name and slug from the search results.\n\
@ -407,10 +418,10 @@ pub fn build_system_prompt(
include a travel_time_filter for it.\n\
\n\
Travel time values are in MINUTES (0-120 range).\n\
- \"within 30 minutes\" = max 30\n\
- \"at least 10 minutes\" = min 10\n\
- \"30-45 minute commute\" = min 30, max 45\n\
- If only a max is given, omit min (and vice versa).\n\
- \"within 30 minutes\" = set \"max\": 30\n\
- \"at least 10 minutes\" = set \"min\": 10\n\
- \"30-45 minute commute\" = set \"min\": 30 and \"max\": 45 on the same travel_time_filter\n\
- If only a max is given, omit min (and vice versa). Do not use bound/value for travel time.\n\
\n\
INFERRING TRANSPORT MODE (when the user does not specify one explicitly):\n\
- \"commute\" to a major city centre or station = transit\n\
@ -437,10 +448,6 @@ pub fn build_system_prompt(
// Feature catalogue
parts.push("\n--- AVAILABLE FEATURES ---\n".to_string());
for group in &features.groups {
// Skip individual crime features — only expose "Crime summary" aggregates
if group.name == "Crime" {
continue;
}
parts.push(format!("## {}", group.name));
for feature in &group.features {
match feature {
@ -495,8 +502,8 @@ pub fn build_system_prompt(
parts.push(
"\nUser: \"safe quiet area with good schools and parks\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Serious crime (avg/yr)\", \"bound\": \"max\", \"value\": 20}, \
{\"name\": \"Minor crime (avg/yr)\", \"bound\": \"max\", \"value\": 50}, \
{\"name\": \"Serious crime per 1k residents (avg/yr)\", \"bound\": \"max\", \"value\": 20}, \
{\"name\": \"Minor crime per 1k residents (avg/yr)\", \"bound\": \"max\", \"value\": 50}, \
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
@ -535,7 +542,7 @@ pub fn build_system_prompt(
{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 500000}], \
\"enum_filters\": [], \
\"travel_time_filters\": [{\"mode\": \"transit\", \"slug\": \"kings-cross\", \
\"label\": \"Kings Cross\", \"bound\": \"max\", \"value\": 30}], \
\"label\": \"Kings Cross\", \"max\": 30}], \
\"notes\": \"\"}"
.to_string(),
);
@ -552,11 +559,21 @@ pub fn build_system_prompt(
\"enum_filters\": [{\"name\": \"Property type\", \
\"values\": [\"Detached\", \"Semi-Detached\"]}], \
\"travel_time_filters\": [{\"mode\": \"car\", \"slug\": \"manchester\", \
\"label\": \"Manchester\", \"bound\": \"max\", \"value\": 45}], \
\"label\": \"Manchester\", \"max\": 45}], \
\"notes\": \"No filter for: garden\"}"
.to_string(),
);
parts.push(
"\nUser: \"Labour-voting area with low burglary and a station nearby\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"% Labour\", \"bound\": \"min\", \"value\": 40}, \
{\"name\": \"Burglary (avg/yr)\", \"bound\": \"max\", \"value\": 10}, \
{\"name\": \"Distance to nearest amenity (Rail station) (km)\", \"bound\": \"max\", \"value\": 1}], \
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
.to_string(),
);
// Examples showing rent and price features
parts.push(
"\nUser: \"2 bed flat with rent under £1500/month\"\n\
@ -585,8 +602,9 @@ pub fn build_system_prompt(
"\n--- OUTPUT FORMAT ---\n\
{\"numeric_filters\": [...], \"enum_filters\": [...], \
\"travel_time_filters\": [{\"mode\": \"...\", \"slug\": \"...\", \"label\": \"...\", \
\"bound\": \"min\"|\"max\", \"value\": N}, ...], \"notes\": \"...\"}\n\
- travel_time_filters: use ONLY slugs returned by search_destinations. If a place isn't found, mention it in notes.\n\
\"min\": N, \"max\": N}, ...], \"notes\": \"...\"}\n\
- travel_time_filters: min and max are both optional, but include at least one. \
Use ONLY slugs returned by search_destinations. If a place isn't found, mention it in notes.\n\
Respond with ONLY the JSON object. No explanation."
.to_string(),
);
@ -685,6 +703,22 @@ async fn update_ai_usage(state: &AppState, user_id: &str, tokens_used: u64, week
}
}
async fn record_ai_request_usage(
state: &AppState,
user_id: &str,
existing_tokens_used: u64,
week: u64,
request_tokens_used: u64,
status: &'static str,
) {
if request_tokens_used > 0 {
let new_total = existing_tokens_used.saturating_add(request_tokens_used);
update_ai_usage(state, user_id, new_total, week).await;
counter!("ai_tokens_total").increment(request_tokens_used);
}
counter!("ai_requests_total", "status" => status).increment(1);
}
/// Convert validated filter JSON back to the `;;`-separated filter string format
/// that `parse_filters` expects.
///
@ -848,7 +882,8 @@ pub async fn post_ai_filters(
let user_text = if let Some(ref ctx) = req.context {
let mut msg = String::new();
msg.push_str("Currently active filters:\n");
msg.push_str(&serde_json::to_string(&ctx.filters).unwrap_or_default());
let normalized_filters = normalize_context_filters(&ctx.filters);
msg.push_str(&serde_json::to_string(&normalized_filters).unwrap_or_default());
if !ctx.travel_time.is_empty() {
msg.push_str("\nCurrently active travel time filters:\n");
for tt in &ctx.travel_time {
@ -892,13 +927,28 @@ pub async fn post_ai_filters(
}
});
let json_resp = gemini_chat(
let json_resp = match gemini_chat(
&state.http_client,
&state.gemini_api_key,
&state.gemini_model,
&body,
)
.await?;
.await
{
Ok(resp) => resp,
Err(err) => {
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"llm_error",
)
.await;
return Err(err);
}
};
// Accumulate token usage
total_tokens_accumulated += json_resp
@ -907,22 +957,43 @@ pub async fn post_ai_filters(
.and_then(|tc| tc.as_u64())
.unwrap_or(0);
let candidate = json_resp
let candidate = match json_resp
.get("candidates")
.and_then(|cs| cs.get(0))
.and_then(|c| c.get("content"))
.ok_or_else(|| {
{
Some(candidate) => candidate,
None => {
warn!("Malformed Gemini response: missing candidates[0].content");
(StatusCode::BAD_GATEWAY, "Malformed Gemini response".into())
})?;
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"malformed_response",
)
.await;
return Err((StatusCode::BAD_GATEWAY, "Malformed Gemini response".into()));
}
};
let parts = candidate
.get("parts")
.and_then(|p| p.as_array())
.ok_or_else(|| {
let parts = match candidate.get("parts").and_then(|p| p.as_array()) {
Some(parts) => parts,
None => {
warn!("Malformed Gemini response: missing parts array");
(StatusCode::BAD_GATEWAY, "Malformed Gemini response".into())
})?;
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"malformed_response",
)
.await;
return Err((StatusCode::BAD_GATEWAY, "Malformed Gemini response".into()));
}
};
// Check if the model made a function call.
// Find the full part (includes thoughtSignature required by Gemini 3 models).
@ -967,7 +1038,7 @@ pub async fn post_ai_filters(
"parts": [{
"functionResponse": {
"name": fn_name,
"response": { "results": fn_result }
"response": fn_result
}
}]
}));
@ -991,6 +1062,15 @@ pub async fn post_ai_filters(
round, retry_count
);
if retry_count > MAX_RETRIES {
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"empty_response",
)
.await;
return Err((
StatusCode::BAD_GATEWAY,
"AI returned empty responses".into(),
@ -1010,6 +1090,15 @@ pub async fn post_ai_filters(
retry_count += 1;
warn!(error = %err, round = round, retry = retry_count, "Failed to parse Gemini JSON output");
if retry_count > MAX_RETRIES {
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"invalid_json",
)
.await;
return Err((StatusCode::BAD_GATEWAY, "AI returned invalid JSON".into()));
}
contents.push(candidate.clone());
@ -1047,10 +1136,15 @@ pub async fn post_ai_filters(
if refinement_attempts > MAX_REFINEMENTS {
warn!("Refinement budget exhausted, returning filters with 0 matches");
let new_total = tokens_used + total_tokens_accumulated;
update_ai_usage(&state, &user.id, new_total, current_week).await;
counter!("ai_tokens_total").increment(total_tokens_accumulated);
counter!("ai_requests_total", "status" => "zero_matches").increment(1);
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"zero_matches",
)
.await;
let notes = if notes.is_empty() {
"No properties match these filters. Try relaxing some constraints.".to_string()
@ -1094,12 +1188,15 @@ pub async fn post_ai_filters(
continue;
}
// Update usage with total accumulated tokens
let new_total = tokens_used + total_tokens_accumulated;
update_ai_usage(&state, &user.id, new_total, current_week).await;
counter!("ai_tokens_total").increment(total_tokens_accumulated);
counter!("ai_requests_total", "status" => "success").increment(1);
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"success",
)
.await;
// Log the query to PocketBase (fire-and-forget)
let filters_json = serde_json::to_string(&filters).unwrap_or_default();
@ -1134,12 +1231,53 @@ pub async fn post_ai_filters(
"AI exhausted {} total rounds without final response (tools={}, retries={}, refinements={})",
MAX_TOTAL_ROUNDS, tool_call_count, retry_count, refinement_attempts
);
record_ai_request_usage(
&state,
&user.id,
tokens_used,
current_week,
total_tokens_accumulated,
"incomplete",
)
.await;
Err((
StatusCode::BAD_GATEWAY,
"AI could not complete the request".into(),
))
}
fn travel_time_minute_field(item: &Value, key: &str) -> Option<f32> {
item.get(key)
.and_then(|val| val.as_f64())
.filter(|val| val.is_finite())
.map(|val| val.clamp(0.0, 120.0) as f32)
}
fn parse_travel_time_bounds(item: &Value) -> (Option<f32>, Option<f32>) {
let explicit_min = travel_time_minute_field(item, "min");
let explicit_max = travel_time_minute_field(item, "max");
let (mut min, mut max) = if explicit_min.is_some() || explicit_max.is_some() {
(explicit_min, explicit_max)
} else {
let value = travel_time_minute_field(item, "value");
match (item.get("bound").and_then(|val| val.as_str()), value) {
(Some("min"), Some(val)) => (Some(val), None),
(Some("max"), Some(val)) => (None, Some(val)),
_ => (None, None),
}
};
if let (Some(min_val), Some(max_val)) = (min, max) {
if min_val > max_val {
min = Some(max_val);
max = Some(min_val);
}
}
(min, max)
}
/// Validate travel time filters from LLM output against available destinations.
fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTimeFilter> {
let arr = match raw
@ -1177,14 +1315,7 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
continue;
}
let bound = item.get("bound").and_then(|val| val.as_str());
let value = item.get("value").and_then(|val| val.as_f64());
let (min, max) = match (bound, value) {
(Some("min"), Some(val)) => (Some(val.clamp(0.0, 120.0) as f32), None),
(Some("max"), Some(val)) => (None, Some(val.clamp(0.0, 120.0) as f32)),
_ => (None, None),
};
let (min, max) = parse_travel_time_bounds(item);
// Only include if at least one bound is set
if min.is_some() || max.is_some() {
@ -1251,14 +1382,16 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
// produces [0, value] rather than [2nd-percentile, value].
if let Some(arr) = raw.get("numeric_filters").and_then(|val| val.as_array()) {
for item in arr {
let name = match item.get("name").and_then(|val| val.as_str()) {
let raw_name = match item.get("name").and_then(|val| val.as_str()) {
Some(name) => name,
None => continue,
};
let (slider_min, slider_max, data_min, data_max) = match numeric_features.get(name) {
Some(range) => *range,
None => continue,
};
let name = canonical_filter_name(raw_name);
let (slider_min, slider_max, data_min, data_max) =
match numeric_features.get(name.as_str()) {
Some(range) => *range,
None => continue,
};
let bound = match item.get("bound").and_then(|val| val.as_str()) {
Some(b) => b,
None => continue,
@ -1275,7 +1408,7 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
};
// Only include if range is narrower than full slider range
if filter_min > slider_min || filter_max < slider_max {
result.insert(name.to_string(), json!([filter_min, filter_max]));
result.insert(name, json!([filter_min, filter_max]));
}
}
}
@ -1283,11 +1416,12 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
// Process enum filters
if let Some(arr) = raw.get("enum_filters").and_then(|val| val.as_array()) {
for item in arr {
let name = match item.get("name").and_then(|val| val.as_str()) {
let raw_name = match item.get("name").and_then(|val| val.as_str()) {
Some(name) => name,
None => continue,
};
let valid_values = match enum_features.get(name) {
let name = canonical_filter_name(raw_name);
let valid_values = match enum_features.get(name.as_str()) {
Some(values) => *values,
None => continue,
};
@ -1298,7 +1432,7 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
.filter(|str_val| valid_values.iter().any(|known| known == str_val))
.collect();
if !valid.is_empty() && valid.len() < valid_values.len() {
result.insert(name.to_string(), json!(valid));
result.insert(name, json!(valid));
}
}
}
@ -1334,4 +1468,56 @@ mod tests {
let input = " ```json\n {\"a\": 1} \n``` ";
assert_eq!(strip_markdown_fences(input), "{\"a\": 1}");
}
#[test]
fn synthetic_filter_keys_are_normalized_to_backend_names() {
assert_eq!(
canonical_filter_name("Schools:primary:good:2:0"),
"Good+ primary schools within 2km"
);
assert_eq!(
canonical_filter_name("Specific crimes:Burglary%20%28avg%2Fyr%29:1"),
"Burglary (avg/yr)"
);
assert_eq!(
canonical_filter_name("Political vote share:%25%20Labour:0"),
"% Labour"
);
assert_eq!(
canonical_filter_name(
"Transport distance:Distance%20to%20nearest%20amenity%20%28Bus%20stop%29%20%28km%29:0"
),
"Distance to nearest amenity (Bus stop) (km)"
);
}
#[test]
fn context_filters_are_normalized_before_prompting() {
let filters = json!({
"Political vote share:%25%20Green:0": [40, 100],
"Estimated current price": [0, 500000],
});
let normalized = normalize_context_filters(&filters);
assert_eq!(normalized["% Green"], json!([40, 100]));
assert_eq!(normalized["Estimated current price"], json!([0, 500000]));
}
#[test]
fn travel_time_bounds_accept_min_max_schema() {
let item = json!({ "min": 30, "max": 45 });
assert_eq!(parse_travel_time_bounds(&item), (Some(30.0), Some(45.0)));
}
#[test]
fn travel_time_bounds_accept_legacy_bound_value_schema() {
let item = json!({ "bound": "max", "value": 30 });
assert_eq!(parse_travel_time_bounds(&item), (None, Some(30.0)));
}
#[test]
fn travel_time_bounds_clamp_and_order_range() {
let item = json!({ "min": 150, "max": -10 });
assert_eq!(parse_travel_time_bounds(&item), (Some(0.0), Some(120.0)));
}
}

View file

@ -160,17 +160,30 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
for (feat_idx, name) in data.poi_metrics.feature_names.iter().enumerate() {
if let Some(category) = features::dynamic_poi_distance_category(name) {
let stats = &data.poi_metrics.feature_stats[feat_idx];
let is_park = category.eq_ignore_ascii_case("park");
dynamic_poi_features.push(FeatureInfo::Numeric {
name: name.clone(),
min: stats.slider_min,
max: stats.slider_max,
step: 0.1,
histogram: stats.histogram.clone(),
description: format!("Distance to the closest {category} amenity"),
detail: format!(
"Straight-line distance in kilometres from the postcode to the nearest {category} amenity in the amenities dataset."
),
source: "osm-pois".to_string(),
description: if is_park {
"Distance to the closest park or green space".to_string()
} else {
format!("Distance to the closest {category} amenity")
},
detail: if is_park {
"Straight-line distance in kilometres from the postcode to the nearest park entrance. Covers public parks, gardens, playing fields, and play spaces. Uses access point locations from the OS Open Greenspace dataset, so properties bordering a large park correctly show a short distance.".to_string()
} else {
format!(
"Straight-line distance in kilometres from the postcode to the nearest {category} amenity in the amenities dataset."
)
},
source: if is_park {
"os-open-greenspace".to_string()
} else {
"osm-pois".to_string()
},
prefix: "",
suffix: " km",
raw: false,