Fix data pipelines once and for all
This commit is contained in:
parent
08560476c5
commit
4012e4e047
46 changed files with 4508 additions and 855 deletions
|
|
@ -55,6 +55,7 @@ const DASHBOARD_POI_GROUPS: &[(&str, &[&str])] = &[
|
|||
&[
|
||||
"Rail station",
|
||||
"Tube station",
|
||||
"Tram & Metro stop",
|
||||
"Bus station",
|
||||
"Bus stop",
|
||||
"Airport",
|
||||
|
|
@ -79,7 +80,7 @@ const DASHBOARD_POI_GROUPS: &[(&str, &[&str])] = &[
|
|||
),
|
||||
(
|
||||
"Health",
|
||||
&["GP Surgery", "Pharmacy", "Dentist", "Hospital & Clinic"],
|
||||
&["GP Surgery", "Pharmacy", "Dentist", "Hospital", "Clinic"],
|
||||
),
|
||||
(
|
||||
"Leisure",
|
||||
|
|
|
|||
|
|
@ -180,20 +180,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: true,
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Within conservation area",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether the postcode point falls inside a designated conservation area",
|
||||
detail: "Planning Data conservation area boundaries, matched to the postcode representative point. The national dataset is a work in progress and may include duplicates or incomplete local coverage, so boundary-sensitive decisions should be checked with the local planning authority.",
|
||||
source: "conservation-areas",
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Listed building",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether this property appears to match a Historic England listed building entry",
|
||||
detail: "Historic England National Heritage List for England listed-building points, matched conservatively to property addresses using the listed-entry name and nearby postcode candidates. Treat this as a screening signal, not a legal determination: verify any specific property on the NHLE and with the local planning authority.",
|
||||
source: "listed-buildings",
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Noise (dB)",
|
||||
bounds: Bounds::Fixed {
|
||||
|
|
@ -209,6 +195,20 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Within conservation area",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether the postcode point falls inside a designated conservation area",
|
||||
detail: "Planning Data conservation area boundaries, matched to the postcode representative point. The national dataset is a work in progress and may include duplicates or incomplete local coverage, so boundary-sensitive decisions should be checked with the local planning authority.",
|
||||
source: "conservation-areas",
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Listed building",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether this property appears to match a Historic England listed building entry",
|
||||
detail: "Historic England National Heritage List for England listed-building points, matched conservatively to property addresses using the listed-entry name and nearby postcode candidates. Treat this as a screening signal, not a legal determination: verify any specific property on the NHLE and with the local planning authority.",
|
||||
source: "listed-buildings",
|
||||
}),
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -307,89 +307,14 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
name: "Schools",
|
||||
features: &[
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Good+ primary schools within 2km",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 10.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Primary schools rated Good or Outstanding by Ofsted within 2km",
|
||||
detail: "State-funded primary schools within 2km with a current Ofsted rating of Good or Outstanding. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Good+ secondary schools within 2km",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 5.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Secondary schools rated Good or Outstanding by Ofsted within 2km",
|
||||
detail: "State-funded secondary schools within 2km with a current Ofsted rating of Good or Outstanding. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Outstanding primary schools within 2km",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 10.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Primary schools rated Outstanding by Ofsted within 2km",
|
||||
detail: "State-funded primary schools within 2km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Outstanding secondary schools within 2km",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 5.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Secondary schools rated Outstanding by Ofsted within 2km",
|
||||
detail: "State-funded secondary schools within 2km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Good+ primary schools within 5km",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 30.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Primary schools rated Good or Outstanding by Ofsted within 5km",
|
||||
detail: "State-funded primary schools within 5km with a current Ofsted rating of Good or Outstanding. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Good+ secondary schools within 5km",
|
||||
name: "Good+ primary school catchments",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 15.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Secondary schools rated Good or Outstanding by Ofsted within 5km",
|
||||
detail: "State-funded secondary schools within 5km with a current Ofsted rating of Good or Outstanding. Schools not yet inspected are excluded.",
|
||||
description: "Primary schools rated Good or Outstanding whose modelled catchment area covers this postcode",
|
||||
detail: "How many state-funded primary schools with a current Ofsted rating of Good or Outstanding draw their pupils from an area covering this postcode. Catchment radii are modelled by simulating England's distance-based admissions (each school's places against the local child population, Census 2021) and calibrated against published 'last distance offered' figures; they are estimates, not official admission areas. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
|
|
@ -397,14 +322,14 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Outstanding primary schools within 5km",
|
||||
name: "Good+ secondary school catchments",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 30.0,
|
||||
max: 11.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Primary schools rated Outstanding by Ofsted within 5km",
|
||||
detail: "State-funded primary schools within 5km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
|
||||
description: "Secondary schools rated Good or Outstanding whose modelled catchment area covers this postcode",
|
||||
detail: "How many state-funded secondary schools with a current Ofsted rating of Good or Outstanding draw their pupils from an area covering this postcode. Catchment radii are modelled by simulating England's distance-based admissions (each school's places against the local child population, Census 2021) and calibrated against published 'last distance offered' figures; they are estimates, not official admission areas. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
|
|
@ -412,14 +337,29 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Outstanding secondary schools within 5km",
|
||||
name: "Outstanding primary school catchments",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 15.0,
|
||||
max: 8.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Secondary schools rated Outstanding by Ofsted within 5km",
|
||||
detail: "State-funded secondary schools within 5km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
|
||||
description: "Primary schools rated Outstanding whose modelled catchment area covers this postcode",
|
||||
detail: "How many state-funded primary schools with a current Ofsted rating of Outstanding draw their pupils from an area covering this postcode. Catchment radii are modelled by simulating England's distance-based admissions (each school's places against the local child population, Census 2021) and calibrated against published 'last distance offered' figures; they are estimates, not official admission areas. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Outstanding secondary school catchments",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 4.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Secondary schools rated Outstanding whose modelled catchment area covers this postcode",
|
||||
detail: "How many state-funded secondary schools with a current Ofsted rating of Outstanding draw their pupils from an area covering this postcode. Catchment radii are modelled by simulating England's distance-based admissions (each school's places against the local child population, Census 2021) and calibrated against published 'last distance offered' figures; they are estimates, not official admission areas. Schools not yet inspected are excluded.",
|
||||
source: "ofsted",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
|
|
|
|||
|
|
@ -62,6 +62,42 @@ pub struct AiFiltersResponse {
|
|||
notes: String,
|
||||
/// Number of properties matching the proposed property and travel time filters.
|
||||
match_count: usize,
|
||||
/// Bounding box of the matching properties so the client can move the
|
||||
/// camera to where matches actually are. Absent when nothing matches.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
match_bounds: Option<MatchBounds>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct MatchBounds {
|
||||
south: f32,
|
||||
west: f32,
|
||||
north: f32,
|
||||
east: f32,
|
||||
}
|
||||
|
||||
/// Bounding box over matched coordinates, trimmed to the 5th–95th percentile
|
||||
/// per axis (when there are enough points) so a handful of remote outliers
|
||||
/// doesn't zoom the camera out to all of England.
|
||||
fn percentile_trimmed_bounds(mut lats: Vec<f32>, mut lons: Vec<f32>) -> Option<MatchBounds> {
|
||||
if lats.is_empty() || lats.len() != lons.len() {
|
||||
return None;
|
||||
}
|
||||
lats.sort_unstable_by(f32::total_cmp);
|
||||
lons.sort_unstable_by(f32::total_cmp);
|
||||
let last = lats.len() - 1;
|
||||
let (lo, hi) = if lats.len() >= 20 {
|
||||
let trim = lats.len() / 20;
|
||||
(trim, last - trim)
|
||||
} else {
|
||||
(0, last)
|
||||
};
|
||||
Some(MatchBounds {
|
||||
south: lats[lo],
|
||||
north: lats[hi],
|
||||
west: lons[lo],
|
||||
east: lons[hi],
|
||||
})
|
||||
}
|
||||
|
||||
/// Strip markdown code fences (```json ... ``` or ``` ... ```) from LLM output.
|
||||
|
|
@ -90,17 +126,12 @@ fn school_feature_name_from_key(name: &str) -> Option<&'static str> {
|
|||
let mut parts = rest.split(':');
|
||||
let phase = parts.next()?;
|
||||
let rating = parts.next()?;
|
||||
let distance = parts.next()?;
|
||||
|
||||
match (phase, rating, distance) {
|
||||
("primary", "good", "2") => Some("Good+ primary schools within 2km"),
|
||||
("secondary", "good", "2") => Some("Good+ secondary schools within 2km"),
|
||||
("primary", "outstanding", "2") => Some("Outstanding primary schools within 2km"),
|
||||
("secondary", "outstanding", "2") => Some("Outstanding secondary schools within 2km"),
|
||||
("primary", "good", "5") => Some("Good+ primary schools within 5km"),
|
||||
("secondary", "good", "5") => Some("Good+ secondary schools within 5km"),
|
||||
("primary", "outstanding", "5") => Some("Outstanding primary schools within 5km"),
|
||||
("secondary", "outstanding", "5") => Some("Outstanding secondary schools within 5km"),
|
||||
match (phase, rating) {
|
||||
("primary", "good") => Some("Good+ primary school catchments"),
|
||||
("secondary", "good") => Some("Good+ secondary school catchments"),
|
||||
("primary", "outstanding") => Some("Outstanding primary school catchments"),
|
||||
("secondary", "outstanding") => Some("Outstanding secondary school catchments"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -508,8 +539,8 @@ pub fn build_system_prompt(
|
|||
{\"name\": \"Serious crime (avg/yr)\", \"bound\": \"max\", \"value\": 5}, \
|
||||
{\"name\": \"Minor crime (avg/yr)\", \"bound\": \"max\", \"value\": 20}, \
|
||||
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
|
||||
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
|
||||
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
|
||||
{\"name\": \"Good+ primary school catchments\", \"bound\": \"min\", \"value\": 2}, \
|
||||
{\"name\": \"Good+ secondary school catchments\", \"bound\": \"min\", \"value\": 1}, \
|
||||
{\"name\": \"Number of amenities (Park) within 2km\", \"bound\": \"min\", \"value\": 3}], \
|
||||
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
|
||||
.to_string(),
|
||||
|
|
@ -519,8 +550,8 @@ pub fn build_system_prompt(
|
|||
"\nUser: \"quiet area with outstanding schools\"\n\
|
||||
Output: {\"numeric_filters\": [\
|
||||
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
|
||||
{\"name\": \"Outstanding primary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
|
||||
{\"name\": \"Outstanding secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}], \
|
||||
{\"name\": \"Outstanding primary school catchments\", \"bound\": \"min\", \"value\": 1}, \
|
||||
{\"name\": \"Outstanding secondary school catchments\", \"bound\": \"min\", \"value\": 1}], \
|
||||
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
|
||||
.to_string(),
|
||||
);
|
||||
|
|
@ -557,8 +588,8 @@ pub fn build_system_prompt(
|
|||
Output: {\"numeric_filters\": [\
|
||||
{\"name\": \"Total floor area (sqm)\", \"bound\": \"min\", \"value\": 100}, \
|
||||
{\"name\": \"Number of bedrooms & living rooms\", \"bound\": \"min\", \"value\": 5}, \
|
||||
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
|
||||
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}], \
|
||||
{\"name\": \"Good+ primary school catchments\", \"bound\": \"min\", \"value\": 2}, \
|
||||
{\"name\": \"Good+ secondary school catchments\", \"bound\": \"min\", \"value\": 1}], \
|
||||
\"enum_filters\": [{\"name\": \"Property type\", \
|
||||
\"values\": [\"Detached\", \"Semi-Detached\"]}], \
|
||||
\"travel_time_filters\": [{\"mode\": \"car\", \"slug\": \"manchester\", \
|
||||
|
|
@ -592,7 +623,7 @@ pub fn build_system_prompt(
|
|||
"\nUser: \"3 bed house under 500k with good schools\"\n\
|
||||
Output: {\
|
||||
\"numeric_filters\": [{\"name\": \"Estimated current price\", \"bound\": \"max\", \"value\": 500000}, \
|
||||
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}], \
|
||||
{\"name\": \"Good+ primary school catchments\", \"bound\": \"min\", \"value\": 2}], \
|
||||
\"enum_filters\": [{\"name\": \"Property type\", \
|
||||
\"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
|
||||
\"travel_time_filters\": [], \
|
||||
|
|
@ -759,7 +790,7 @@ fn count_matching_rows(
|
|||
state: &AppState,
|
||||
filters: &Value,
|
||||
travel_time_filters: &[TravelTimeFilter],
|
||||
) -> usize {
|
||||
) -> (usize, Option<MatchBounds>) {
|
||||
let filter_str = filters_to_filter_string(filters);
|
||||
|
||||
let quant = state.data.quant_ref();
|
||||
|
|
@ -778,7 +809,7 @@ fn count_matching_rows(
|
|||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse filters for match count: {err}");
|
||||
return 0;
|
||||
return (0, None);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -800,6 +831,8 @@ fn count_matching_rows(
|
|||
let has_poi_filters = !parsed_poi_filters.is_empty();
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut matched_lats: Vec<f32> = Vec::new();
|
||||
let mut matched_lons: Vec<f32> = Vec::new();
|
||||
for (row, pc_key) in pc_keys.iter().enumerate().take(num_rows) {
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
|
|
@ -836,9 +869,11 @@ fn count_matching_rows(
|
|||
}
|
||||
|
||||
count += 1;
|
||||
matched_lats.push(state.data.lat[row]);
|
||||
matched_lons.push(state.data.lon[row]);
|
||||
}
|
||||
|
||||
count
|
||||
(count, percentile_trimmed_bounds(matched_lats, matched_lons))
|
||||
}
|
||||
|
||||
/// Budget limits for the Gemini conversation loop. Separate counters prevent
|
||||
|
|
@ -1132,7 +1167,7 @@ pub async fn post_ai_filters(
|
|||
.to_string();
|
||||
|
||||
// Count matching properties and refine if too restrictive
|
||||
let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
let (match_count, match_bounds) = count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
info!(
|
||||
match_count = match_count,
|
||||
round = round,
|
||||
|
|
@ -1173,6 +1208,7 @@ pub async fn post_ai_filters(
|
|||
travel_time_filters,
|
||||
notes,
|
||||
match_count: 0,
|
||||
match_bounds: None,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -1236,6 +1272,7 @@ pub async fn post_ai_filters(
|
|||
travel_time_filters,
|
||||
notes,
|
||||
match_count,
|
||||
match_bounds,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -1488,9 +1525,14 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn synthetic_filter_keys_are_normalized_to_backend_names() {
|
||||
assert_eq!(
|
||||
canonical_filter_name("Schools:primary:good:0"),
|
||||
"Good+ primary school catchments"
|
||||
);
|
||||
// Legacy keys still carry a distance segment; it is ignored.
|
||||
assert_eq!(
|
||||
canonical_filter_name("Schools:primary:good:2:0"),
|
||||
"Good+ primary schools within 2km"
|
||||
"Good+ primary school catchments"
|
||||
);
|
||||
assert_eq!(
|
||||
canonical_filter_name("Specific crimes:Burglary%20%28avg%2Fyr%29:1"),
|
||||
|
|
|
|||
|
|
@ -68,8 +68,11 @@ pub async fn get_filter_counts(
|
|||
let num_total_filters = num_regular + travel_filter_indices.len();
|
||||
|
||||
if num_total_filters == 0 {
|
||||
// With no active filters the total is simply every property in bounds.
|
||||
// count_in_bounds is O(grid cells), far cheaper than walking every row.
|
||||
let total = state.grid.count_in_bounds(south, west, north, east) as u32;
|
||||
return Ok(Json(FilterCountsResponse {
|
||||
total: 0,
|
||||
total,
|
||||
impacts: FxHashMap::default(),
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue