This commit is contained in:
Andras Schmelczer 2026-05-12 22:13:07 +01:00
parent 11711c57e6
commit 81a16f543c
21 changed files with 29072 additions and 1913 deletions

View file

@ -6,7 +6,7 @@ use axum::response::Json;
use axum::Extension;
use metrics::counter;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use serde_json::{json, Map, Value};
use tracing::{info, warn};
use crate::auth::OptionalUser;
@ -60,7 +60,7 @@ pub struct AiFiltersResponse {
/// What the LLM couldn't map to existing filters (empty if everything matched)
#[serde(skip_serializing_if = "String::is_empty")]
notes: String,
/// Number of properties matching the proposed filters (excludes travel time)
/// Number of properties matching the proposed property and travel time filters.
match_count: usize,
}
@ -85,6 +85,77 @@ fn strip_markdown_fences(text: &str) -> &str {
trimmed
}
fn school_feature_name_from_key(name: &str) -> Option<&'static str> {
let rest = name.strip_prefix("Schools:")?;
let mut parts = rest.split(':');
let phase = parts.next()?;
let rating = parts.next()?;
let distance = parts.next()?;
match (phase, rating, distance) {
("primary", "good", "2") => Some("Good+ primary schools within 2km"),
("secondary", "good", "2") => Some("Good+ secondary schools within 2km"),
("primary", "outstanding", "2") => Some("Outstanding primary schools within 2km"),
("secondary", "outstanding", "2") => Some("Outstanding secondary schools within 2km"),
("primary", "good", "5") => Some("Good+ primary schools within 5km"),
("secondary", "good", "5") => Some("Good+ secondary schools within 5km"),
("primary", "outstanding", "5") => Some("Outstanding primary schools within 5km"),
("secondary", "outstanding", "5") => Some("Outstanding secondary schools within 5km"),
_ => None,
}
}
fn decode_synthetic_feature_key(name: &str, prefix: &str) -> Option<String> {
let rest = name.strip_prefix(prefix)?;
let (encoded, _id) = rest.rsplit_once(':')?;
urlencoding::decode(encoded)
.ok()
.map(|decoded| decoded.into_owned())
}
/// Convert frontend synthetic filter keys back to backend feature names.
///
/// The React filter UI stores configurable cards under keys such as
/// `Political vote share:%25%20Labour:0`. The LLM and backend validators need
/// the real feature name (`% Labour`) instead.
fn backend_filter_name(name: &str) -> Option<String> {
if let Some(feature_name) = school_feature_name_from_key(name) {
return Some(feature_name.to_string());
}
for prefix in [
"Specific crimes:",
"Political vote share:",
"Ethnicities:",
"Amenity distance:",
"Transport distance:",
"Amenities within 2km:",
"Amenities within 5km:",
] {
if let Some(feature_name) = decode_synthetic_feature_key(name, prefix) {
return Some(feature_name);
}
}
None
}
fn canonical_filter_name(name: &str) -> String {
backend_filter_name(name).unwrap_or_else(|| name.to_string())
}
fn normalize_context_filters(filters: &Value) -> Value {
let Some(obj) = filters.as_object() else {
return filters.clone();
};
let mut normalized = Map::with_capacity(obj.len());
for (name, value) in obj {
normalized.insert(canonical_filter_name(name), value.clone());
}
Value::Object(normalized)
}
/// Build the Gemini tool declaration for destination search.
fn build_tool_declarations(state: &AppState) -> Value {
let modes: Vec<&str> = state
@ -289,7 +360,7 @@ pub fn build_system_prompt(
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 1km.\n\
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of amenities (Park) within 2km.\n\
- \"good schools\" = Good+ school features. \"outstanding schools\" = Outstanding school features.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
@ -429,7 +500,7 @@ pub fn build_system_prompt(
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 2km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Good+ secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
{\"name\": \"Number of parks within 1km\", \"bound\": \"min\", \"value\": 3}], \
{\"name\": \"Number of amenities (Park) within 2km\", \"bound\": \"min\", \"value\": 3}], \
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
.to_string(),
);

View file

@ -10,6 +10,9 @@ use crate::data::{Histogram, PropertyData};
use crate::features::{self, Feature, FEATURE_GROUPS};
use crate::state::SharedState;
const FILTER_GROUP_ORDER: &[&str] = &["Transport", "Property prices", "Properties", "Amenities"];
const LAST_FILTER_GROUPS: &[&str] = &["Area development"];
fn is_empty(val: &str) -> bool {
val.is_empty()
}
@ -62,6 +65,23 @@ pub struct FeaturesResponse {
pub groups: Vec<FeatureGroupResponse>,
}
fn filter_group_rank(name: &str) -> usize {
if let Some(index) = FILTER_GROUP_ORDER
.iter()
.position(|group_name| *group_name == name)
{
return index;
}
if LAST_FILTER_GROUPS.contains(&name) {
return usize::MAX;
}
FILTER_GROUP_ORDER.len()
}
fn order_filter_groups(groups: &mut [FeatureGroupResponse]) {
groups.sort_by_key(|group| filter_group_rank(&group.name));
}
/// Build the features response at startup. Called once and cached in AppState.
/// Feature order in each group follows the array order in FEATURE_GROUPS.
pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
@ -146,9 +166,9 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
max: stats.slider_max,
step: 0.1,
histogram: stats.histogram.clone(),
description: format!("Distance to the closest {category} POI"),
description: format!("Distance to the closest {category} amenity"),
detail: format!(
"Straight-line distance in kilometres from the postcode to the nearest {category} point of interest in the POI dataset."
"Straight-line distance in kilometres from the postcode to the nearest {category} amenity in the amenities dataset."
),
source: "osm-pois".to_string(),
prefix: "",
@ -159,17 +179,32 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
} else if let Some(category) = features::dynamic_poi_count_category(name) {
let stats = &data.poi_metrics.feature_stats[feat_idx];
let radius = features::dynamic_poi_count_radius(name).unwrap_or(0);
let is_park = category.eq_ignore_ascii_case("park");
dynamic_poi_features.push(FeatureInfo::Numeric {
name: name.clone(),
min: stats.slider_min,
max: stats.slider_max,
step: 1.0,
histogram: stats.histogram.clone(),
description: format!("Number of {category} POIs within {radius}km"),
detail: format!(
"Count of {category} points of interest within a {radius}km radius of the property's postcode centroid."
),
source: "osm-pois".to_string(),
description: if is_park {
format!("Number of parks and green spaces within {radius}km")
} else {
format!("Number of {category} amenities within {radius}km")
},
detail: if is_park {
format!(
"Count of public parks, gardens, playing fields, and play spaces with at least one entrance within a {radius}km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset."
)
} else {
format!(
"Count of {category} amenities within a {radius}km radius of the property's postcode centroid."
)
},
source: if is_park {
"os-open-greenspace".to_string()
} else {
"osm-pois".to_string()
},
prefix: "",
suffix: "",
raw: false,
@ -182,12 +217,18 @@ pub fn build_features_response(data: &PropertyData) -> FeaturesResponse {
FeatureInfo::Numeric { name, .. } => features::dynamic_poi_feature_sort_key(name),
FeatureInfo::Enum { name, .. } => features::dynamic_poi_feature_sort_key(name),
});
groups.push(FeatureGroupResponse {
name: "Nearby POIs".to_string(),
features: dynamic_poi_features,
});
if let Some(amenities_group) = groups.iter_mut().find(|group| group.name == "Amenities") {
amenities_group.features.extend(dynamic_poi_features);
} else {
groups.push(FeatureGroupResponse {
name: "Amenities".to_string(),
features: dynamic_poi_features,
});
}
}
order_filter_groups(&mut groups);
FeaturesResponse { groups }
}
@ -196,3 +237,46 @@ pub async fn get_features(State(shared): State<Arc<SharedState>>) -> Json<Featur
info!("GET /api/features");
Json(state.features_response.clone())
}
#[cfg(test)]
mod tests {
use super::*;
fn group(name: &str) -> FeatureGroupResponse {
FeatureGroupResponse {
name: name.to_string(),
features: Vec::new(),
}
}
#[test]
fn orders_filter_groups_for_backend_response() {
let mut groups = vec![
group("Properties"),
group("Education"),
group("Area development"),
group("Property prices"),
group("Crime"),
group("Neighbours"),
group("Amenities"),
group("Transport"),
];
order_filter_groups(&mut groups);
let names: Vec<&str> = groups.iter().map(|group| group.name.as_str()).collect();
assert_eq!(
names,
vec![
"Transport",
"Property prices",
"Properties",
"Amenities",
"Education",
"Crime",
"Neighbours",
"Area development",
]
);
}
}

View file

@ -68,7 +68,6 @@ pub async fn get_filter_counts(
}
let filters_str = params.filters;
let has_poi_filters = !parsed_poi_filters.is_empty();
let response = tokio::task::spawn_blocking(move || -> Result<FilterCountsResponse, String> {
let t0 = std::time::Instant::now();
@ -99,54 +98,40 @@ pub async fn get_filter_counts(
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
let base = row * num_features;
let mut fail_count: u32 = 0;
let mut fail_index: usize = 0;
let mut passes_all = true;
// Test numeric filters
for (i, f) in parsed_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
fail_count += 1;
fail_index = i;
if fail_count > 1 {
break;
}
impacts[i] += 1;
passes_all = false;
}
}
// Test enum filters
if fail_count <= 1 {
for (i, f) in parsed_enum_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || !f.allowed.contains(&raw) {
fail_count += 1;
fail_index = parsed_filters.len() + i;
if fail_count > 1 {
break;
}
}
for (i, f) in parsed_enum_filters.iter().enumerate() {
let raw = feature_data[base + f.feat_idx];
if raw == NAN_U16 || !f.allowed.contains(&raw) {
impacts[parsed_filters.len() + i] += 1;
passes_all = false;
}
}
// Test POI filters
for (i, f) in parsed_poi_filters.iter().enumerate() {
let raw = state
.data
.poi_metrics
.raw_for_property_row(row, f.metric_idx);
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
impacts[parsed_filters.len() + parsed_enum_filters.len() + i] += 1;
passes_all = false;
}
}
// Test travel time filters
if fail_count <= 1 && has_poi_filters {
for (i, f) in parsed_poi_filters.iter().enumerate() {
let raw = state
.data
.poi_metrics
.raw_for_property_row(row, f.metric_idx);
if raw == NAN_U16 || raw < f.min_u16 || raw > f.max_u16 {
fail_count += 1;
fail_index = parsed_filters.len() + parsed_enum_filters.len() + i;
if fail_count > 1 {
break;
}
}
}
}
// Test travel time filters
if fail_count <= 1 && has_travel {
if has_travel {
let postcode = pc_interner.resolve(&pc_keys[row]);
for (slot, &ti) in travel_filter_indices.iter().enumerate() {
let entry = &travel_entries[ti];
@ -165,19 +150,14 @@ pub async fn get_filter_counts(
_ => true,
};
if !passes {
fail_count += 1;
fail_index = num_regular + slot;
if fail_count > 1 {
break;
}
impacts[num_regular + slot] += 1;
passes_all = false;
}
}
}
match fail_count {
0 => total_passing += 1,
1 => impacts[fail_index] += 1,
_ => {}
if passes_all {
total_passing += 1;
}
});

View file

@ -6,7 +6,7 @@ use axum::response::{IntoResponse, Json};
use axum::Extension;
use metrics::histogram;
use rayon::prelude::*;
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::info;
@ -32,6 +32,7 @@ type ChunkResult = (
FxHashMap<u64, Aggregator>,
FxHashMap<u64, PoiAggregator>,
Vec<FxHashMap<u64, TravelTimeAgg>>,
FxHashSet<u64>,
);
/// Maximum center-to-vertex distance in degrees per H3 resolution.
@ -82,6 +83,7 @@ pub struct HexagonParams {
fn build_feature_maps(
groups: &FxHashMap<u64, Aggregator>,
poi_groups: &FxHashMap<u64, PoiAggregator>,
selectable_cells: &FxHashSet<u64>,
min_keys: &[String],
max_keys: &[String],
avg_keys: &[String],
@ -214,6 +216,36 @@ fn build_feature_maps(
features.push(map);
}
for &cell_id in selectable_cells {
if groups.contains_key(&cell_id) {
continue;
}
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
continue;
};
let center: h3o::LatLng = cell.into();
let lat = center.lat();
let lng = center.lng();
if lat < bound_south || lat > bound_north || lng < bound_west || lng > bound_east {
continue;
}
let mut map = Map::new();
map.insert("h3".into(), Value::String(cell.to_string()));
map.insert("count".into(), Value::from(0));
if let (Some(lat_num), Some(lon_num)) = (
serde_json::Number::from_f64(lat),
serde_json::Number::from_f64(lng),
) {
map.insert("lat".into(), Value::Number(lat_num));
map.insert("lon".into(), Value::Number(lon_num));
}
features.push(map);
}
features
}
@ -313,6 +345,7 @@ pub async fn get_hexagons(
let mut groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
let mut poi_groups: FxHashMap<u64, PoiAggregator> = FxHashMap::default();
let mut selectable_cells: FxHashSet<u64> = FxHashSet::default();
let mut travel_aggs: Vec<FxHashMap<u64, TravelTimeAgg>> = (0..travel_entries.len())
.map(|_| FxHashMap::default())
.collect();
@ -338,12 +371,22 @@ pub async fn get_hexagons(
..travel_entries.len())
.map(|_| FxHashMap::default())
.collect();
let mut local_selectable_cells: FxHashSet<u64> = FxHashSet::default();
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
let mut travel_minutes: Vec<Option<i16>> =
Vec::with_capacity(travel_entries.len());
'row: for &row_idx in chunk {
let row = row_idx as usize;
let cell_id = cell_for_row_cached(
row,
precomputed,
h3_res,
need_parent,
&mut h3_cache,
);
local_selectable_cells.insert(cell_id);
if !row_passes_filters(
row,
&parsed_filters,
@ -384,14 +427,6 @@ pub async fn get_hexagons(
}
}
let cell_id = cell_for_row_cached(
row,
precomputed,
h3_res,
need_parent,
&mut h3_cache,
);
let agg = local_groups
.entry(cell_id)
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
@ -424,12 +459,19 @@ pub async fn get_hexagons(
}
}
(local_groups, local_poi_groups, local_travel_aggs)
(
local_groups,
local_poi_groups,
local_travel_aggs,
local_selectable_cells,
)
})
.collect();
// Merge thread-local results into the main accumulators
for (local_groups, local_poi_groups, local_travel) in thread_results {
for (local_groups, local_poi_groups, local_travel, local_selectable_cells) in
thread_results
{
for (cell_id, local_agg) in local_groups {
groups
.entry(cell_id)
@ -450,6 +492,7 @@ pub async fn get_hexagons(
.merge(&local_tt);
}
}
selectable_cells.extend(local_selectable_cells);
}
} else {
// Sequential: use for_each_in_bounds to avoid Vec<u32> allocation
@ -460,6 +503,9 @@ pub async fn get_hexagons(
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
let cell_id =
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
selectable_cells.insert(cell_id);
if !row_passes_filters(
row,
@ -499,9 +545,6 @@ pub async fn get_hexagons(
}
}
let cell_id =
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
@ -540,6 +583,7 @@ pub async fn get_hexagons(
let mut features = build_feature_maps(
&groups,
&poi_groups,
&selectable_cells,
min_keys,
max_keys,
avg_keys,
@ -564,6 +608,7 @@ pub async fn get_hexagons(
resolution,
rows = row_count,
parallel,
selectable_cells = selectable_cells.len(),
cells_before_filter = groups.len(),
cells_after_filter = features.len(),
truncated,

View file

@ -5,7 +5,7 @@ use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use axum::Extension;
use metrics::histogram;
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::info;
@ -142,11 +142,18 @@ pub async fn get_postcodes(
// Single-pass: aggregate directly into postcode_aggs while iterating properties in bounds
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
let mut poi_aggs: FxHashMap<usize, PoiAggregator> = FxHashMap::default();
let mut selectable_postcodes: FxHashSet<usize> = FxHashSet::default();
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
let postcode = state.data.postcode(row);
let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) else {
return;
};
selectable_postcodes.insert(pc_idx);
if !row_passes_filters(
row,
&parsed_filters,
@ -161,22 +168,19 @@ pub async fn get_postcodes(
return;
}
let postcode = state.data.postcode(row);
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
let agg = postcode_aggs
let agg = postcode_aggs
.entry(pc_idx)
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
} else {
agg.add_row(feature_data, row, num_features, &quant);
}
if has_poi_fields {
poi_aggs
.entry(pc_idx)
.or_insert_with(|| Aggregator::new(num_features, enum_dist_config));
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
} else {
agg.add_row(feature_data, row, num_features, &quant);
}
if has_poi_fields {
poi_aggs
.entry(pc_idx)
.or_insert_with(|| PoiAggregator::new(poi_num_features))
.add_row_selective(poi_metrics, row, poi_field_indices);
}
.or_insert_with(|| PoiAggregator::new(poi_num_features))
.add_row_selective(poi_metrics, row, poi_field_indices);
}
});
@ -229,8 +233,10 @@ pub async fn get_postcodes(
let t_agg = t0.elapsed();
// Build response, filtering postcodes to only those whose polygon intersects query bounds
let mut features = Vec::with_capacity(postcode_aggs.len());
let postcodes_before_filter = postcode_aggs.len();
let mut features = Vec::with_capacity(selectable_postcodes.len());
let postcodes_before_filter = selectable_postcodes.len();
let matching_postcodes = postcode_aggs.len();
let mut included_postcodes: FxHashSet<usize> = FxHashSet::default();
let mut filtered_out = 0usize;
for (pc_idx, aggregation) in postcode_aggs {
@ -255,7 +261,7 @@ pub async fn get_postcodes(
continue;
}
let geometry = postcode_data.geometries[pc_idx].clone();
let geometry = postcode_data.geometry_geojson(pc_idx);
// Build properties
let centroid = postcode_data.centroids[pc_idx];
@ -347,18 +353,71 @@ pub async fn get_postcodes(
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
included_postcodes.insert(pc_idx);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
}
if features.len() < MAX_CELLS_PER_REQUEST {
for pc_idx in selectable_postcodes {
if included_postcodes.contains(&pc_idx) {
continue;
}
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = postcode_data.geometry_geojson(pc_idx);
let centroid = postcode_data.centroids[pc_idx];
let mut props = Map::new();
props.insert(
"postcode".into(),
Value::String(postcode_data.postcodes[pc_idx].clone()),
);
props.insert("count".into(), Value::from(0));
props.insert(
"centroid".into(),
Value::Array(vec![
Value::from(centroid.1 as f64),
Value::from(centroid.0 as f64),
]),
);
let mut feature = Map::new();
feature.insert("type".into(), Value::String("Feature".into()));
feature.insert("geometry".into(), geometry);
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
}
}
histogram!("postcodes_response_count").record(features.len() as f64);
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed();
info!(
postcodes_before_filter,
matching_postcodes,
postcodes_after_filter = features.len(),
filtered_out,
truncated,
@ -418,7 +477,7 @@ pub async fn get_nearest_postcode(
let idx = best_idx.ok_or(StatusCode::NOT_FOUND)?;
let (lat, lon) = postcode_data.centroids[idx];
let geometry = postcode_data.geometries[idx].clone();
let geometry = postcode_data.geometry_geojson(idx);
let postcode = &postcode_data.postcodes[idx];
// Log location for authenticated users (best-effort, non-blocking)
@ -454,7 +513,7 @@ pub async fn get_postcode_lookup(
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
let (lat, lon) = postcode_data.centroids[idx];
let geometry = postcode_data.geometries[idx].clone();
let geometry = postcode_data.geometry_geojson(idx);
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
Ok(Json(serde_json::json!({

View file

@ -0,0 +1,237 @@
use std::sync::Arc;
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::response::Redirect;
use reqwest::Url;
use serde::Deserialize;
use serde_json::Value;
use tracing::warn;
use crate::state::SharedState;
use crate::utils::normalize_postcode;
const RIGHTMOVE_TYPEAHEAD_URL: &str = "https://los.rightmove.co.uk/typeahead";
const RIGHTMOVE_HOST: &str = "www.rightmove.co.uk";
const RIGHTMOVE_FIND_PATH: &str = "/property-for-sale/find.html";
#[derive(Deserialize)]
pub struct RightmoveRedirectParams {
postcode: String,
target: String,
}
#[derive(Deserialize)]
struct RightmoveTypeaheadResponse {
#[serde(default)]
matches: Vec<RightmoveTypeaheadMatch>,
}
#[derive(Deserialize)]
struct RightmoveTypeaheadMatch {
id: Value,
#[serde(rename = "type")]
match_type: String,
#[serde(default, rename = "displayName")]
display_name: String,
}
pub async fn get_rightmove_redirect(
State(shared): State<Arc<SharedState>>,
Query(params): Query<RightmoveRedirectParams>,
) -> Result<Redirect, (StatusCode, String)> {
if !looks_like_full_uk_postcode(&params.postcode) {
return Err((
StatusCode::BAD_REQUEST,
"'postcode' must be a full UK postcode".to_string(),
));
}
let postcode = normalize_postcode(&params.postcode);
let mut target = parse_rightmove_target(&params.target)?;
let state = shared.load_state();
match fetch_exact_postcode_location_identifier(&state.http_client, &postcode).await {
Some(location_identifier) => {
apply_exact_postcode_location(&mut target, &postcode, &location_identifier);
}
None => warn!(
postcode,
"Could not resolve exact Rightmove postcode location"
),
}
Ok(Redirect::temporary(target.as_str()))
}
async fn fetch_exact_postcode_location_identifier(
client: &reqwest::Client,
postcode: &str,
) -> Option<String> {
let url = format!(
"{}?query={}&limit=5",
RIGHTMOVE_TYPEAHEAD_URL,
urlencoding::encode(postcode)
);
let response = client
.get(url)
.send()
.await
.map_err(|err| warn!(postcode, "Rightmove typeahead request failed: {err}"))
.ok()?;
if !response.status().is_success() {
warn!(
postcode,
status = %response.status(),
"Rightmove typeahead returned an error"
);
return None;
}
let typeahead: RightmoveTypeaheadResponse = response
.json()
.await
.map_err(|err| {
warn!(
postcode,
"Failed to parse Rightmove typeahead response: {err}"
)
})
.ok()?;
typeahead.matches.iter().find_map(|item| {
if !item.match_type.eq_ignore_ascii_case("POSTCODE") {
return None;
}
if compact_postcode(&item.display_name) != compact_postcode(postcode) {
return None;
}
rightmove_id_to_string(&item.id).map(|id| format!("POSTCODE^{id}"))
})
}
fn parse_rightmove_target(target: &str) -> Result<Url, (StatusCode, String)> {
let url = Url::parse(target).map_err(|_| {
(
StatusCode::BAD_REQUEST,
"'target' must be a valid Rightmove URL".to_string(),
)
})?;
if url.scheme() != "https"
|| url.host_str() != Some(RIGHTMOVE_HOST)
|| url.path() != RIGHTMOVE_FIND_PATH
{
return Err((
StatusCode::BAD_REQUEST,
"'target' must be a Rightmove property search URL".to_string(),
));
}
Ok(url)
}
fn apply_exact_postcode_location(url: &mut Url, postcode: &str, location_identifier: &str) {
let mut pairs: Vec<(String, String)> = url
.query_pairs()
.filter(|(key, _)| {
key != "searchLocation"
&& key != "useLocationIdentifier"
&& key != "locationIdentifier"
&& key != "radius"
})
.map(|(key, value)| (key.into_owned(), value.into_owned()))
.collect();
pairs.push(("searchLocation".to_string(), postcode.to_string()));
pairs.push(("useLocationIdentifier".to_string(), "true".to_string()));
pairs.push((
"locationIdentifier".to_string(),
location_identifier.to_string(),
));
pairs.push(("radius".to_string(), "0.0".to_string()));
let mut query = url.query_pairs_mut();
query.clear();
for (key, value) in pairs {
query.append_pair(&key, &value);
}
}
fn rightmove_id_to_string(value: &Value) -> Option<String> {
match value {
Value::String(id) if !id.trim().is_empty() => Some(id.clone()),
Value::Number(id) => Some(id.to_string()),
_ => None,
}
}
fn compact_postcode(postcode: &str) -> String {
postcode
.chars()
.filter(|ch| !ch.is_whitespace())
.map(|ch| ch.to_ascii_uppercase())
.collect()
}
fn looks_like_full_uk_postcode(postcode: &str) -> bool {
let compact = compact_postcode(postcode);
let bytes = compact.as_bytes();
if !(5..=7).contains(&bytes.len()) {
return false;
}
let outward_len = bytes.len() - 3;
bytes[0].is_ascii_alphabetic()
&& bytes[..outward_len]
.iter()
.all(|byte| byte.is_ascii_alphanumeric())
&& bytes[outward_len].is_ascii_digit()
&& bytes[outward_len + 1].is_ascii_alphabetic()
&& bytes[outward_len + 2].is_ascii_alphabetic()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rewrites_rightmove_url_to_exact_postcode_location() {
let mut url = Url::parse(
"https://www.rightmove.co.uk/property-for-sale/find.html?searchLocation=SW1A+1AA&useLocationIdentifier=true&locationIdentifier=OUTCODE%5E2506&radius=0.25&minPrice=100000",
)
.unwrap();
apply_exact_postcode_location(&mut url, "SW1A 1AA", "POSTCODE^837246");
let pairs: std::collections::HashMap<_, _> = url.query_pairs().into_owned().collect();
assert_eq!(pairs.get("searchLocation").unwrap(), "SW1A 1AA");
assert_eq!(pairs.get("useLocationIdentifier").unwrap(), "true");
assert_eq!(pairs.get("locationIdentifier").unwrap(), "POSTCODE^837246");
assert_eq!(pairs.get("radius").unwrap(), "0.0");
assert_eq!(pairs.get("minPrice").unwrap(), "100000");
}
#[test]
fn rejects_non_rightmove_redirect_targets() {
assert!(parse_rightmove_target("https://example.com/property-for-sale/find.html").is_err());
assert!(
parse_rightmove_target("http://www.rightmove.co.uk/property-for-sale/find.html")
.is_err()
);
assert!(
parse_rightmove_target("https://www.rightmove.co.uk/property-to-rent/find.html")
.is_err()
);
}
#[test]
fn validates_full_postcode_shape() {
assert!(looks_like_full_uk_postcode("SW1A 1AA"));
assert!(looks_like_full_uk_postcode("e16an"));
assert!(!looks_like_full_uk_postcode("SW1A"));
assert!(!looks_like_full_uk_postcode("not a postcode"));
}
}