lgtm
This commit is contained in:
parent
11711c57e6
commit
81a16f543c
21 changed files with 29072 additions and 1913 deletions
|
|
@ -6,7 +6,7 @@ use axum::response::Json;
|
|||
use axum::Extension;
|
||||
use metrics::counter;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use serde_json::{json, Map, Value};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
|
|
@ -60,7 +60,7 @@ pub struct AiFiltersResponse {
|
|||
/// What the LLM couldn't map to existing filters (empty if everything matched)
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
notes: String,
|
||||
/// Number of properties matching the proposed filters (excludes travel time)
|
||||
/// Number of properties matching the proposed property and travel time filters.
|
||||
match_count: usize,
|
||||
}
|
||||
|
||||
|
|
@ -85,6 +85,77 @@ fn strip_markdown_fences(text: &str) -> &str {
|
|||
trimmed
|
||||
}
|
||||
|
||||
fn school_feature_name_from_key(name: &str) -> Option<&'static str> {
|
||||
let rest = name.strip_prefix("Schools:")?;
|
||||
let mut parts = rest.split(':');
|
||||
let phase = parts.next()?;
|
||||
let rating = parts.next()?;
|
||||
let distance = parts.next()?;
|
||||
|
||||
match (phase, rating, distance) {
|
||||
("primary", "good", "2") => Some("Good+ primary schools within 2km"),
|
||||
("secondary", "good", "2") => Some("Good+ secondary schools within 2km"),
|
||||
("primary", "outstanding", "2") => Some("Outstanding primary schools within 2km"),
|
||||
("secondary", "outstanding", "2") => Some("Outstanding secondary schools within 2km"),
|
||||
("primary", "good", "5") => Some("Good+ primary schools within 5km"),
|
||||
("secondary", "good", "5") => Some("Good+ secondary schools within 5km"),
|
||||
("primary", "outstanding", "5") => Some("Outstanding primary schools within 5km"),
|
||||
("secondary", "outstanding", "5") => Some("Outstanding secondary schools within 5km"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_synthetic_feature_key(name: &str, prefix: &str) -> Option<String> {
|
||||
let rest = name.strip_prefix(prefix)?;
|
||||
let (encoded, _id) = rest.rsplit_once(':')?;
|
||||
urlencoding::decode(encoded)
|
||||
.ok()
|
||||
.map(|decoded| decoded.into_owned())
|
||||
}
|
||||
|
||||
/// Convert frontend synthetic filter keys back to backend feature names.
|
||||
///
|
||||
/// The React filter UI stores configurable cards under keys such as
|
||||
/// `Political vote share:%25%20Labour:0`. The LLM and backend validators need
|
||||
/// the real feature name (`% Labour`) instead.
|
||||
fn backend_filter_name(name: &str) -> Option<String> {
|
||||
if let Some(feature_name) = school_feature_name_from_key(name) {
|
||||
return Some(feature_name.to_string());
|
||||
}
|
||||
|
||||
for prefix in [
|
||||
"Specific crimes:",
|
||||
"Political vote share:",
|
||||
"Ethnicities:",
|
||||
"Amenity distance:",
|
||||
"Transport distance:",
|
||||
"Amenities within 2km:",
|
||||
"Amenities within 5km:",
|
||||
] {
|
||||
if let Some(feature_name) = decode_synthetic_feature_key(name, prefix) {
|
||||
return Some(feature_name);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn canonical_filter_name(name: &str) -> String {
|
||||
backend_filter_name(name).unwrap_or_else(|| name.to_string())
|
||||
}
|
||||
|
||||
fn normalize_context_filters(filters: &Value) -> Value {
|
||||
let Some(obj) = filters.as_object() else {
|
||||
return filters.clone();
|
||||
};
|
||||
|
||||
let mut normalized = Map::with_capacity(obj.len());
|
||||
for (name, value) in obj {
|
||||
normalized.insert(canonical_filter_name(name), value.clone());
|
||||
}
|
||||
Value::Object(normalized)
|
||||
}
|
||||
|
||||
/// Build the Gemini tool declaration for destination search.
|
||||
fn build_tool_declarations(state: &AppState) -> Value {
|
||||
let modes: Vec<&str> = state
|
||||
|
|
@ -289,7 +360,7 @@ pub fn build_system_prompt(
|
|||
- Use EXACT feature names from the list — spelling, capitalisation, and punctuation must match.\n\
|
||||
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
|
||||
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
|
||||
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 1km.\n\
|
||||
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of amenities (Park) within 2km.\n\
|
||||
- \"good schools\" = Good+ school features. \"outstanding schools\" = Outstanding school features.\n\
|
||||
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
|
||||
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
|
||||
|
|
@ -429,7 +500,7 @@ pub fn build_system_prompt(
|
|||
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
|
||||
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
|
||||
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
|
||||
{\"name\": \"Number of parks within 1km\", \"bound\": \"min\", \"value\": 3}], \
|
||||
{\"name\": \"Number of amenities (Park) within 2km\", \"bound\": \"min\", \"value\": 3}], \
|
||||
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
|
||||
.to_string(),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ use crate::data::{Histogram, PropertyData};
|
|||
use crate::features::{self, Feature, FEATURE_GROUPS};
|
||||
use crate::state::SharedState;
|
||||
|
||||
const FILTER_GROUP_ORDER: &[&str] = &["Transport", "Property prices", "Properties", "Amenities"];
|
||||
const LAST_FILTER_GROUPS: &[&str] = &["Area development"];
|
||||
|
||||
fn is_empty(val: &str) -> bool {
|
||||
val.is_empty()
|
||||
}
|
||||
|
|
@ -62,6 +65,23 @@ pub struct FeaturesResponse {
|
|||
pub groups: Vec<FeatureGroupResponse>,
|
||||
}
|
||||
|
||||
fn filter_group_rank(name: &str) -> usize {
|
||||
if let Some(index) = FILTER_GROUP_ORDER
|
||||
.iter()
|
||||
.position(|group_name| *group_name == name)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
if LAST_FILTER_GROUPS.contains(&name) {
|
||||
return usize::MAX;
|
||||
}
|
||||
FILTER_GROUP_ORDER.len()
|
||||
}
|
||||
|
||||
fn order_filter_groups(groups: &mut [FeatureGroupResponse]) {
|
||||
groups.sort_by_key(|group| filter_group_rank(&group.name));
|
||||
}
|
||||
|
||||
/// Build the features response at startup. Called once and cached in AppState.
|
||||
/// Feature order in each group follows the array order in FEATURE_GROUPS.
|
||||
pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
|
||||
|
|
@ -146,9 +166,9 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
|
|||
max: stats.slider_max,
|
||||
step: 0.1,
|
||||
histogram: stats.histogram.clone(),
|
||||
description: format!("Distance to the closest {category} POI"),
|
||||
description: format!("Distance to the closest {category} amenity"),
|
||||
detail: format!(
|
||||
"Straight-line distance in kilometres from the postcode to the nearest {category} point of interest in the POI dataset."
|
||||
"Straight-line distance in kilometres from the postcode to the nearest {category} amenity in the amenities dataset."
|
||||
),
|
||||
source: "osm-pois".to_string(),
|
||||
prefix: "",
|
||||
|
|
@ -159,17 +179,32 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
|
|||
} else if let Some(category) = features::dynamic_poi_count_category(name) {
|
||||
let stats = &data.poi_metrics.feature_stats[feat_idx];
|
||||
let radius = features::dynamic_poi_count_radius(name).unwrap_or(0);
|
||||
let is_park = category.eq_ignore_ascii_case("park");
|
||||
dynamic_poi_features.push(FeatureInfo::Numeric {
|
||||
name: name.clone(),
|
||||
min: stats.slider_min,
|
||||
max: stats.slider_max,
|
||||
step: 1.0,
|
||||
histogram: stats.histogram.clone(),
|
||||
description: format!("Number of {category} POIs within {radius}km"),
|
||||
detail: format!(
|
||||
"Count of {category} points of interest within a {radius}km radius of the property's postcode centroid."
|
||||
),
|
||||
source: "osm-pois".to_string(),
|
||||
description: if is_park {
|
||||
format!("Number of parks and green spaces within {radius}km")
|
||||
} else {
|
||||
format!("Number of {category} amenities within {radius}km")
|
||||
},
|
||||
detail: if is_park {
|
||||
format!(
|
||||
"Count of public parks, gardens, playing fields, and play spaces with at least one entrance within a {radius}km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset."
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Count of {category} amenities within a {radius}km radius of the property's postcode centroid."
|
||||
)
|
||||
},
|
||||
source: if is_park {
|
||||
"os-open-greenspace".to_string()
|
||||
} else {
|
||||
"osm-pois".to_string()
|
||||
},
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
|
|
@ -182,12 +217,18 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
|
|||
FeatureInfo::Numeric { name, .. } => features::dynamic_poi_feature_sort_key(name),
|
||||
FeatureInfo::Enum { name, .. } => features::dynamic_poi_feature_sort_key(name),
|
||||
});
|
||||
groups.push(FeatureGroupResponse {
|
||||
name: "Nearby POIs".to_string(),
|
||||
features: dynamic_poi_features,
|
||||
});
|
||||
if let Some(amenities_group) = groups.iter_mut().find(|group| group.name == "Amenities") {
|
||||
amenities_group.features.extend(dynamic_poi_features);
|
||||
} else {
|
||||
groups.push(FeatureGroupResponse {
|
||||
name: "Amenities".to_string(),
|
||||
features: dynamic_poi_features,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
order_filter_groups(&mut groups);
|
||||
|
||||
FeaturesResponse { groups }
|
||||
}
|
||||
|
||||
|
|
@ -196,3 +237,46 @@ pub async fn get_features(State(shared): State<Arc<SharedState>>) -> Json<Featur
|
|||
info!("GET /api/features");
|
||||
Json(state.features_response.clone())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn group(name: &str) -> FeatureGroupResponse {
|
||||
FeatureGroupResponse {
|
||||
name: name.to_string(),
|
||||
features: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn orders_filter_groups_for_backend_response() {
|
||||
let mut groups = vec![
|
||||
group("Properties"),
|
||||
group("Education"),
|
||||
group("Area development"),
|
||||
group("Property prices"),
|
||||
group("Crime"),
|
||||
group("Neighbours"),
|
||||
group("Amenities"),
|
||||
group("Transport"),
|
||||
];
|
||||
|
||||
order_filter_groups(&mut groups);
|
||||
|
||||
let names: Vec<&str> = groups.iter().map(|group| group.name.as_str()).collect();
|
||||
assert_eq!(
|
||||
names,
|
||||
vec![
|
||||
"Transport",
|
||||
"Property prices",
|
||||
"Properties",
|
||||
"Amenities",
|
||||
"Education",
|
||||
"Crime",
|
||||
"Neighbours",
|
||||
"Area development",
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,7 +68,6 @@ pub async fn get_filter_counts(
|
|||
}
|
||||
|
||||
let filters_str = params.filters;
|
||||
let has_poi_filters = !parsed_poi_filters.is_empty();
|
||||
|
||||
let response = tokio::task::spawn_blocking(move || -> Result<FilterCountsResponse, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
|
@ -99,54 +98,40 @@ pub async fn get_filter_counts(
|
|||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
let base = row * num_features;
|
||||
let mut fail_count: u32 = 0;
|
||||
let mut fail_index: usize = 0;
|
||||
let mut passes_all = true;
|
||||
|
||||
// Test numeric filters
|
||||
for (i, f) in parsed_filters.iter().enumerate() {
|
||||
let raw = feature_data[base + f.feat_idx];
|
||||
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
|
||||
fail_count += 1;
|
||||
fail_index = i;
|
||||
if fail_count > 1 {
|
||||
break;
|
||||
}
|
||||
impacts[i] += 1;
|
||||
passes_all = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Test enum filters
|
||||
if fail_count <= 1 {
|
||||
for (i, f) in parsed_enum_filters.iter().enumerate() {
|
||||
let raw = feature_data[base + f.feat_idx];
|
||||
if raw == NAN_U16 || !f.allowed.contains(&raw) {
|
||||
fail_count += 1;
|
||||
fail_index = parsed_filters.len() + i;
|
||||
if fail_count > 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i, f) in parsed_enum_filters.iter().enumerate() {
|
||||
let raw = feature_data[base + f.feat_idx];
|
||||
if raw == NAN_U16 || !f.allowed.contains(&raw) {
|
||||
impacts[parsed_filters.len() + i] += 1;
|
||||
passes_all = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Test POI filters
|
||||
for (i, f) in parsed_poi_filters.iter().enumerate() {
|
||||
let raw = state
|
||||
.data
|
||||
.poi_metrics
|
||||
.raw_for_property_row(row, f.metric_idx);
|
||||
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
|
||||
impacts[parsed_filters.len() + parsed_enum_filters.len() + i] += 1;
|
||||
passes_all = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Test travel time filters
|
||||
if fail_count <= 1 && has_poi_filters {
|
||||
for (i, f) in parsed_poi_filters.iter().enumerate() {
|
||||
let raw = state
|
||||
.data
|
||||
.poi_metrics
|
||||
.raw_for_property_row(row, f.metric_idx);
|
||||
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
|
||||
fail_count += 1;
|
||||
fail_index = parsed_filters.len() + parsed_enum_filters.len() + i;
|
||||
if fail_count > 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test travel time filters
|
||||
if fail_count <= 1 && has_travel {
|
||||
if has_travel {
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
for (slot, &ti) in travel_filter_indices.iter().enumerate() {
|
||||
let entry = &travel_entries[ti];
|
||||
|
|
@ -165,19 +150,14 @@ pub async fn get_filter_counts(
|
|||
_ => true,
|
||||
};
|
||||
if !passes {
|
||||
fail_count += 1;
|
||||
fail_index = num_regular + slot;
|
||||
if fail_count > 1 {
|
||||
break;
|
||||
}
|
||||
impacts[num_regular + slot] += 1;
|
||||
passes_all = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match fail_count {
|
||||
0 => total_passing += 1,
|
||||
1 => impacts[fail_index] += 1,
|
||||
_ => {}
|
||||
if passes_all {
|
||||
total_passing += 1;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use axum::response::{IntoResponse, Json};
|
|||
use axum::Extension;
|
||||
use metrics::histogram;
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use tracing::info;
|
||||
|
|
@ -32,6 +32,7 @@ type ChunkResult = (
|
|||
FxHashMap<u64, Aggregator>,
|
||||
FxHashMap<u64, PoiAggregator>,
|
||||
Vec<FxHashMap<u64, TravelTimeAgg>>,
|
||||
FxHashSet<u64>,
|
||||
);
|
||||
|
||||
/// Maximum center-to-vertex distance in degrees per H3 resolution.
|
||||
|
|
@ -82,6 +83,7 @@ pub struct HexagonParams {
|
|||
fn build_feature_maps(
|
||||
groups: &FxHashMap<u64, Aggregator>,
|
||||
poi_groups: &FxHashMap<u64, PoiAggregator>,
|
||||
selectable_cells: &FxHashSet<u64>,
|
||||
min_keys: &[String],
|
||||
max_keys: &[String],
|
||||
avg_keys: &[String],
|
||||
|
|
@ -214,6 +216,36 @@ fn build_feature_maps(
|
|||
features.push(map);
|
||||
}
|
||||
|
||||
for &cell_id in selectable_cells {
|
||||
if groups.contains_key(&cell_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let center: h3o::LatLng = cell.into();
|
||||
let lat = center.lat();
|
||||
let lng = center.lng();
|
||||
|
||||
if lat < bound_south || lat > bound_north || lng < bound_west || lng > bound_east {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut map = Map::new();
|
||||
map.insert("h3".into(), Value::String(cell.to_string()));
|
||||
map.insert("count".into(), Value::from(0));
|
||||
if let (Some(lat_num), Some(lon_num)) = (
|
||||
serde_json::Number::from_f64(lat),
|
||||
serde_json::Number::from_f64(lng),
|
||||
) {
|
||||
map.insert("lat".into(), Value::Number(lat_num));
|
||||
map.insert("lon".into(), Value::Number(lon_num));
|
||||
}
|
||||
features.push(map);
|
||||
}
|
||||
|
||||
features
|
||||
}
|
||||
|
||||
|
|
@ -313,6 +345,7 @@ pub async fn get_hexagons(
|
|||
|
||||
let mut groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
|
||||
let mut poi_groups: FxHashMap<u64, PoiAggregator> = FxHashMap::default();
|
||||
let mut selectable_cells: FxHashSet<u64> = FxHashSet::default();
|
||||
let mut travel_aggs: Vec<FxHashMap<u64, TravelTimeAgg>> = (0..travel_entries.len())
|
||||
.map(|_| FxHashMap::default())
|
||||
.collect();
|
||||
|
|
@ -338,12 +371,22 @@ pub async fn get_hexagons(
|
|||
..travel_entries.len())
|
||||
.map(|_| FxHashMap::default())
|
||||
.collect();
|
||||
let mut local_selectable_cells: FxHashSet<u64> = FxHashSet::default();
|
||||
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
||||
let mut travel_minutes: Vec<Option<i16>> =
|
||||
Vec::with_capacity(travel_entries.len());
|
||||
|
||||
'row: for &row_idx in chunk {
|
||||
let row = row_idx as usize;
|
||||
let cell_id = cell_for_row_cached(
|
||||
row,
|
||||
precomputed,
|
||||
h3_res,
|
||||
need_parent,
|
||||
&mut h3_cache,
|
||||
);
|
||||
local_selectable_cells.insert(cell_id);
|
||||
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
@ -384,14 +427,6 @@ pub async fn get_hexagons(
|
|||
}
|
||||
}
|
||||
|
||||
let cell_id = cell_for_row_cached(
|
||||
row,
|
||||
precomputed,
|
||||
h3_res,
|
||||
need_parent,
|
||||
&mut h3_cache,
|
||||
);
|
||||
|
||||
let agg = local_groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
|
||||
|
|
@ -424,12 +459,19 @@ pub async fn get_hexagons(
|
|||
}
|
||||
}
|
||||
|
||||
(local_groups, local_poi_groups, local_travel_aggs)
|
||||
(
|
||||
local_groups,
|
||||
local_poi_groups,
|
||||
local_travel_aggs,
|
||||
local_selectable_cells,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Merge thread-local results into the main accumulators
|
||||
for (local_groups, local_poi_groups, local_travel) in thread_results {
|
||||
for (local_groups, local_poi_groups, local_travel, local_selectable_cells) in
|
||||
thread_results
|
||||
{
|
||||
for (cell_id, local_agg) in local_groups {
|
||||
groups
|
||||
.entry(cell_id)
|
||||
|
|
@ -450,6 +492,7 @@ pub async fn get_hexagons(
|
|||
.merge(&local_tt);
|
||||
}
|
||||
}
|
||||
selectable_cells.extend(local_selectable_cells);
|
||||
}
|
||||
} else {
|
||||
// Sequential: use for_each_in_bounds to avoid Vec<u32> allocation
|
||||
|
|
@ -460,6 +503,9 @@ pub async fn get_hexagons(
|
|||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
let cell_id =
|
||||
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
|
||||
selectable_cells.insert(cell_id);
|
||||
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
|
|
@ -499,9 +545,6 @@ pub async fn get_hexagons(
|
|||
}
|
||||
}
|
||||
|
||||
let cell_id =
|
||||
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
|
||||
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
|
||||
|
|
@ -540,6 +583,7 @@ pub async fn get_hexagons(
|
|||
let mut features = build_feature_maps(
|
||||
&groups,
|
||||
&poi_groups,
|
||||
&selectable_cells,
|
||||
min_keys,
|
||||
max_keys,
|
||||
avg_keys,
|
||||
|
|
@ -564,6 +608,7 @@ pub async fn get_hexagons(
|
|||
resolution,
|
||||
rows = row_count,
|
||||
parallel,
|
||||
selectable_cells = selectable_cells.len(),
|
||||
cells_before_filter = groups.len(),
|
||||
cells_after_filter = features.len(),
|
||||
truncated,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use axum::http::StatusCode;
|
|||
use axum::response::{IntoResponse, Json};
|
||||
use axum::Extension;
|
||||
use metrics::histogram;
|
||||
use rustc_hash::FxHashMap;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use tracing::info;
|
||||
|
|
@ -142,11 +142,18 @@ pub async fn get_postcodes(
|
|||
// Single-pass: aggregate directly into postcode_aggs while iterating properties in bounds
|
||||
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
|
||||
let mut poi_aggs: FxHashMap<usize, PoiAggregator> = FxHashMap::default();
|
||||
let mut selectable_postcodes: FxHashSet<usize> = FxHashSet::default();
|
||||
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
let postcode = state.data.postcode(row);
|
||||
let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) else {
|
||||
return;
|
||||
};
|
||||
selectable_postcodes.insert(pc_idx);
|
||||
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
|
|
@ -161,22 +168,19 @@ pub async fn get_postcodes(
|
|||
return;
|
||||
}
|
||||
|
||||
let postcode = state.data.postcode(row);
|
||||
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
|
||||
let agg = postcode_aggs
|
||||
let agg = postcode_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features, &quant);
|
||||
}
|
||||
if has_poi_fields {
|
||||
poi_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features, &quant);
|
||||
}
|
||||
if has_poi_fields {
|
||||
poi_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| PoiAggregator::new(poi_num_features))
|
||||
.add_row_selective(poi_metrics, row, poi_field_indices);
|
||||
}
|
||||
.or_insert_with(|| PoiAggregator::new(poi_num_features))
|
||||
.add_row_selective(poi_metrics, row, poi_field_indices);
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -229,8 +233,10 @@ pub async fn get_postcodes(
|
|||
let t_agg = t0.elapsed();
|
||||
|
||||
// Build response, filtering postcodes to only those whose polygon intersects query bounds
|
||||
let mut features = Vec::with_capacity(postcode_aggs.len());
|
||||
let postcodes_before_filter = postcode_aggs.len();
|
||||
let mut features = Vec::with_capacity(selectable_postcodes.len());
|
||||
let postcodes_before_filter = selectable_postcodes.len();
|
||||
let matching_postcodes = postcode_aggs.len();
|
||||
let mut included_postcodes: FxHashSet<usize> = FxHashSet::default();
|
||||
let mut filtered_out = 0usize;
|
||||
|
||||
for (pc_idx, aggregation) in postcode_aggs {
|
||||
|
|
@ -255,7 +261,7 @@ pub async fn get_postcodes(
|
|||
continue;
|
||||
}
|
||||
|
||||
let geometry = postcode_data.geometries[pc_idx].clone();
|
||||
let geometry = postcode_data.geometry_geojson(pc_idx);
|
||||
|
||||
// Build properties
|
||||
let centroid = postcode_data.centroids[pc_idx];
|
||||
|
|
@ -347,18 +353,71 @@ pub async fn get_postcodes(
|
|||
feature.insert("properties".into(), Value::Object(props));
|
||||
|
||||
features.push(feature);
|
||||
included_postcodes.insert(pc_idx);
|
||||
|
||||
if features.len() >= MAX_CELLS_PER_REQUEST {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if features.len() < MAX_CELLS_PER_REQUEST {
|
||||
for pc_idx in selectable_postcodes {
|
||||
if included_postcodes.contains(&pc_idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
|
||||
|
||||
if !bounds_intersect(
|
||||
pc_south as f64,
|
||||
pc_west as f64,
|
||||
pc_north as f64,
|
||||
pc_east as f64,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
) {
|
||||
filtered_out += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let geometry = postcode_data.geometry_geojson(pc_idx);
|
||||
let centroid = postcode_data.centroids[pc_idx];
|
||||
let mut props = Map::new();
|
||||
props.insert(
|
||||
"postcode".into(),
|
||||
Value::String(postcode_data.postcodes[pc_idx].clone()),
|
||||
);
|
||||
props.insert("count".into(), Value::from(0));
|
||||
props.insert(
|
||||
"centroid".into(),
|
||||
Value::Array(vec![
|
||||
Value::from(centroid.1 as f64),
|
||||
Value::from(centroid.0 as f64),
|
||||
]),
|
||||
);
|
||||
|
||||
let mut feature = Map::new();
|
||||
feature.insert("type".into(), Value::String("Feature".into()));
|
||||
feature.insert("geometry".into(), geometry);
|
||||
feature.insert("properties".into(), Value::Object(props));
|
||||
|
||||
features.push(feature);
|
||||
|
||||
if features.len() >= MAX_CELLS_PER_REQUEST {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
histogram!("postcodes_response_count").record(features.len() as f64);
|
||||
|
||||
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
|
||||
let t_total = t0.elapsed();
|
||||
info!(
|
||||
postcodes_before_filter,
|
||||
matching_postcodes,
|
||||
postcodes_after_filter = features.len(),
|
||||
filtered_out,
|
||||
truncated,
|
||||
|
|
@ -418,7 +477,7 @@ pub async fn get_nearest_postcode(
|
|||
|
||||
let idx = best_idx.ok_or(StatusCode::NOT_FOUND)?;
|
||||
let (lat, lon) = postcode_data.centroids[idx];
|
||||
let geometry = postcode_data.geometries[idx].clone();
|
||||
let geometry = postcode_data.geometry_geojson(idx);
|
||||
let postcode = &postcode_data.postcodes[idx];
|
||||
|
||||
// Log location for authenticated users (best-effort, non-blocking)
|
||||
|
|
@ -454,7 +513,7 @@ pub async fn get_postcode_lookup(
|
|||
|
||||
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
|
||||
let (lat, lon) = postcode_data.centroids[idx];
|
||||
let geometry = postcode_data.geometries[idx].clone();
|
||||
let geometry = postcode_data.geometry_geojson(idx);
|
||||
|
||||
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
|
||||
Ok(Json(serde_json::json!({
|
||||
|
|
|
|||
237
server-rs/src/routes/rightmove.rs
Normal file
237
server-rs/src/routes/rightmove.rs
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::{Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Redirect;
|
||||
use reqwest::Url;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::state::SharedState;
|
||||
use crate::utils::normalize_postcode;
|
||||
|
||||
const RIGHTMOVE_TYPEAHEAD_URL: &str = "https://los.rightmove.co.uk/typeahead";
|
||||
const RIGHTMOVE_HOST: &str = "www.rightmove.co.uk";
|
||||
const RIGHTMOVE_FIND_PATH: &str = "/property-for-sale/find.html";
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct RightmoveRedirectParams {
|
||||
postcode: String,
|
||||
target: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct RightmoveTypeaheadResponse {
|
||||
#[serde(default)]
|
||||
matches: Vec<RightmoveTypeaheadMatch>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct RightmoveTypeaheadMatch {
|
||||
id: Value,
|
||||
#[serde(rename = "type")]
|
||||
match_type: String,
|
||||
#[serde(default, rename = "displayName")]
|
||||
display_name: String,
|
||||
}
|
||||
|
||||
pub async fn get_rightmove_redirect(
|
||||
State(shared): State<Arc<SharedState>>,
|
||||
Query(params): Query<RightmoveRedirectParams>,
|
||||
) -> Result<Redirect, (StatusCode, String)> {
|
||||
if !looks_like_full_uk_postcode(¶ms.postcode) {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"'postcode' must be a full UK postcode".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let postcode = normalize_postcode(¶ms.postcode);
|
||||
let mut target = parse_rightmove_target(¶ms.target)?;
|
||||
let state = shared.load_state();
|
||||
|
||||
match fetch_exact_postcode_location_identifier(&state.http_client, &postcode).await {
|
||||
Some(location_identifier) => {
|
||||
apply_exact_postcode_location(&mut target, &postcode, &location_identifier);
|
||||
}
|
||||
None => warn!(
|
||||
postcode,
|
||||
"Could not resolve exact Rightmove postcode location"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(Redirect::temporary(target.as_str()))
|
||||
}
|
||||
|
||||
async fn fetch_exact_postcode_location_identifier(
|
||||
client: &reqwest::Client,
|
||||
postcode: &str,
|
||||
) -> Option<String> {
|
||||
let url = format!(
|
||||
"{}?query={}&limit=5",
|
||||
RIGHTMOVE_TYPEAHEAD_URL,
|
||||
urlencoding::encode(postcode)
|
||||
);
|
||||
|
||||
let response = client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| warn!(postcode, "Rightmove typeahead request failed: {err}"))
|
||||
.ok()?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
warn!(
|
||||
postcode,
|
||||
status = %response.status(),
|
||||
"Rightmove typeahead returned an error"
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
let typeahead: RightmoveTypeaheadResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|err| {
|
||||
warn!(
|
||||
postcode,
|
||||
"Failed to parse Rightmove typeahead response: {err}"
|
||||
)
|
||||
})
|
||||
.ok()?;
|
||||
|
||||
typeahead.matches.iter().find_map(|item| {
|
||||
if !item.match_type.eq_ignore_ascii_case("POSTCODE") {
|
||||
return None;
|
||||
}
|
||||
if compact_postcode(&item.display_name) != compact_postcode(postcode) {
|
||||
return None;
|
||||
}
|
||||
rightmove_id_to_string(&item.id).map(|id| format!("POSTCODE^{id}"))
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_rightmove_target(target: &str) -> Result<Url, (StatusCode, String)> {
|
||||
let url = Url::parse(target).map_err(|_| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
"'target' must be a valid Rightmove URL".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
if url.scheme() != "https"
|
||||
|| url.host_str() != Some(RIGHTMOVE_HOST)
|
||||
|| url.path() != RIGHTMOVE_FIND_PATH
|
||||
{
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"'target' must be a Rightmove property search URL".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
fn apply_exact_postcode_location(url: &mut Url, postcode: &str, location_identifier: &str) {
|
||||
let mut pairs: Vec<(String, String)> = url
|
||||
.query_pairs()
|
||||
.filter(|(key, _)| {
|
||||
key != "searchLocation"
|
||||
&& key != "useLocationIdentifier"
|
||||
&& key != "locationIdentifier"
|
||||
&& key != "radius"
|
||||
})
|
||||
.map(|(key, value)| (key.into_owned(), value.into_owned()))
|
||||
.collect();
|
||||
|
||||
pairs.push(("searchLocation".to_string(), postcode.to_string()));
|
||||
pairs.push(("useLocationIdentifier".to_string(), "true".to_string()));
|
||||
pairs.push((
|
||||
"locationIdentifier".to_string(),
|
||||
location_identifier.to_string(),
|
||||
));
|
||||
pairs.push(("radius".to_string(), "0.0".to_string()));
|
||||
|
||||
let mut query = url.query_pairs_mut();
|
||||
query.clear();
|
||||
for (key, value) in pairs {
|
||||
query.append_pair(&key, &value);
|
||||
}
|
||||
}
|
||||
|
||||
fn rightmove_id_to_string(value: &Value) -> Option<String> {
|
||||
match value {
|
||||
Value::String(id) if !id.trim().is_empty() => Some(id.clone()),
|
||||
Value::Number(id) => Some(id.to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn compact_postcode(postcode: &str) -> String {
|
||||
postcode
|
||||
.chars()
|
||||
.filter(|ch| !ch.is_whitespace())
|
||||
.map(|ch| ch.to_ascii_uppercase())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn looks_like_full_uk_postcode(postcode: &str) -> bool {
|
||||
let compact = compact_postcode(postcode);
|
||||
let bytes = compact.as_bytes();
|
||||
if !(5..=7).contains(&bytes.len()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let outward_len = bytes.len() - 3;
|
||||
bytes[0].is_ascii_alphabetic()
|
||||
&& bytes[..outward_len]
|
||||
.iter()
|
||||
.all(|byte| byte.is_ascii_alphanumeric())
|
||||
&& bytes[outward_len].is_ascii_digit()
|
||||
&& bytes[outward_len + 1].is_ascii_alphabetic()
|
||||
&& bytes[outward_len + 2].is_ascii_alphabetic()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn rewrites_rightmove_url_to_exact_postcode_location() {
|
||||
let mut url = Url::parse(
|
||||
"https://www.rightmove.co.uk/property-for-sale/find.html?searchLocation=SW1A+1AA&useLocationIdentifier=true&locationIdentifier=OUTCODE%5E2506&radius=0.25&minPrice=100000",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
apply_exact_postcode_location(&mut url, "SW1A 1AA", "POSTCODE^837246");
|
||||
|
||||
let pairs: std::collections::HashMap<_, _> = url.query_pairs().into_owned().collect();
|
||||
assert_eq!(pairs.get("searchLocation").unwrap(), "SW1A 1AA");
|
||||
assert_eq!(pairs.get("useLocationIdentifier").unwrap(), "true");
|
||||
assert_eq!(pairs.get("locationIdentifier").unwrap(), "POSTCODE^837246");
|
||||
assert_eq!(pairs.get("radius").unwrap(), "0.0");
|
||||
assert_eq!(pairs.get("minPrice").unwrap(), "100000");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_rightmove_redirect_targets() {
|
||||
assert!(parse_rightmove_target("https://example.com/property-for-sale/find.html").is_err());
|
||||
assert!(
|
||||
parse_rightmove_target("http://www.rightmove.co.uk/property-for-sale/find.html")
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
parse_rightmove_target("https://www.rightmove.co.uk/property-to-rent/find.html")
|
||||
.is_err()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validates_full_postcode_shape() {
|
||||
assert!(looks_like_full_uk_postcode("SW1A 1AA"));
|
||||
assert!(looks_like_full_uk_postcode("e16an"));
|
||||
assert!(!looks_like_full_uk_postcode("SW1A"));
|
||||
assert!(!looks_like_full_uk_postcode("not a postcode"));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue