seems fine

This commit is contained in:
Andras Schmelczer 2026-05-05 22:29:28 +01:00
parent 48983e3b4b
commit 7a1696541f
37 changed files with 4999 additions and 1242 deletions

View file

@ -4,7 +4,7 @@ mod postcodes;
mod property;
pub mod travel_time;
pub use places::PlaceData;
pub use places::{normalize_search_text, PlaceData};
pub use poi::{POICategoryGroup, POIData};
pub use postcodes::{OutcodeData, PostcodeData};
pub use property::{

View file

@ -11,22 +11,127 @@ use crate::utils::InternedColumn;
pub struct PlaceData {
pub name: Vec<String>,
pub name_lower: Vec<String>,
pub name_search: Vec<String>,
pub place_type: InternedColumn,
pub type_rank: Vec<u8>,
pub population: Vec<u32>,
pub lat: Vec<f32>,
pub lon: Vec<f32>,
pub city: Vec<Option<String>>,
pub travel_destination: Vec<bool>,
}
fn type_rank(place_type: &str) -> u8 {
match place_type {
"city" => 0,
"station" => 1,
_ => 2,
"town" => 1,
"village" => 2,
"suburb" | "neighbourhood" | "quarter" | "borough" | "locality" => 3,
"station" => 4,
"hamlet" | "isolated_dwelling" | "island" => 5,
_ => 6,
}
}
pub fn is_travel_destination_type(place_type: &str) -> bool {
matches!(place_type, "city" | "station")
}
pub fn normalize_search_text(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut last_was_space = true;
for ch in text.chars() {
if ch == '\'' || ch == '' || ch == '`' {
continue;
}
let lower = ch.to_ascii_lowercase();
if lower.is_ascii_alphanumeric() {
result.push(lower);
last_was_space = false;
} else if !last_was_space {
result.push(' ');
last_was_space = true;
}
}
if result.ends_with(' ') {
result.pop();
}
result
}
fn replace_token(text: &str, from: &str, to: &str) -> Option<String> {
let mut changed = false;
let replaced: Vec<&str> = text
.split_whitespace()
.map(|token| {
if token == from {
changed = true;
to
} else {
token
}
})
.collect();
changed.then(|| replaced.join(" "))
}
fn push_alias(aliases: &mut Vec<String>, alias: String) {
if !alias.is_empty() && !aliases.iter().any(|existing| existing == &alias) {
aliases.push(alias);
}
}
fn build_search_text(name: &str, place_type: &str) -> String {
let primary = normalize_search_text(name);
let mut aliases = vec![primary.clone()];
if let Some(alias) = replace_token(&primary, "st", "saint") {
push_alias(&mut aliases, alias);
}
if let Some(alias) = replace_token(&primary, "saint", "st") {
push_alias(&mut aliases, alias);
}
if place_type == "station" {
let suffix_aliases: [(&str, &[&str]); 5] = [
(
" tube station",
&[" underground station", " station", " tube", " underground"],
),
(
" underground station",
&[" tube station", " station", " tube", " underground"],
),
(
" railway station",
&[" rail station", " station", " railway", " rail"],
),
(
" overground station",
&[" station", " overground", " railway station"],
),
(
" elizabeth line station",
&[" station", " elizabeth line", " crossrail station"],
),
];
for (suffix, replacements) in suffix_aliases {
if let Some(stem) = primary.strip_suffix(suffix) {
for replacement in replacements {
push_alias(&mut aliases, format!("{stem}{replacement}"));
}
}
}
}
aliases.join(" | ")
}
fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
let column = df
.column(name)
@ -56,6 +161,23 @@ fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
.collect())
}
fn extract_bool_col_or_default(
df: &DataFrame,
name: &str,
default_value: bool,
) -> anyhow::Result<Vec<bool>> {
let Ok(column) = df.column(name) else {
return Ok(vec![default_value; df.height()]);
};
let bool_column = column
.bool()
.with_context(|| format!("Column '{name}' is not a boolean column"))?;
Ok(bool_column
.into_iter()
.map(|value| value.unwrap_or(default_value))
.collect())
}
impl PlaceData {
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
info!("Loading place data from {:?}...", parquet_path);
@ -80,8 +202,21 @@ impl PlaceData {
};
let name_lower: Vec<String> = name.iter().map(|nm| nm.to_lowercase()).collect();
let name_search: Vec<String> = name
.iter()
.zip(&place_type_raw)
.map(|(nm, pt)| build_search_text(nm, pt))
.collect();
let type_rank_vec: Vec<u8> = place_type_raw.iter().map(|pt| type_rank(pt)).collect();
let place_type = InternedColumn::build(&place_type_raw);
let travel_destination = if df.column("travel_destination").is_ok() {
extract_bool_col_or_default(&df, "travel_destination", true)?
} else {
place_type_raw
.iter()
.map(|place_type| is_travel_destination_type(place_type))
.collect()
};
// Precompute nearest city for each non-city place
let city_indices: Vec<usize> = type_rank_vec
@ -133,12 +268,14 @@ impl PlaceData {
Ok(PlaceData {
name,
name_lower,
name_search,
place_type,
type_rank: type_rank_vec,
population,
lat,
lon,
city,
travel_destination,
})
}
}
@ -149,7 +286,23 @@ mod tests {
#[test]
fn type_rank_ordering() {
assert!(type_rank("city") < type_rank("station"));
assert!(type_rank("city") < type_rank("town"));
assert!(type_rank("town") < type_rank("station"));
assert!(type_rank("station") < type_rank("unknown"));
}
#[test]
fn search_text_handles_common_address_variants() {
assert!(build_search_text("King's Cross tube station", "station")
.contains("kings cross underground"));
assert!(build_search_text("St Albans", "city").contains("saint albans"));
}
#[test]
fn travel_destination_types_match_legacy_places() {
assert!(is_travel_destination_type("city"));
assert!(is_travel_destination_type("station"));
assert!(!is_travel_destination_type("town"));
assert!(!is_travel_destination_type("suburb"));
}
}

View file

@ -5,11 +5,16 @@ use rayon::prelude::*;
use serde::Serialize;
use std::path::Path;
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};
use crate::consts::{H3_PRECOMPUTE_MAX, HISTOGRAM_BINS, NAN_U16, QUANT_SCALE};
use crate::features::{self, Bounds};
const ADDRESS_SEARCH_CANDIDATE_LIMIT: usize = 50_000;
const ADDRESS_SEARCH_MAX_POSTINGS_PER_TOKEN: usize = 250_000;
const ADDRESS_SEARCH_PREFIX_MIN_LEN: usize = 4;
const ADDRESS_SEARCH_PREFIX_MAX_LEN: usize = 8;
fn is_numeric_dtype(dtype: &DataType) -> bool {
matches!(
dtype,
@ -32,6 +37,360 @@ fn is_datetime_dtype(dtype: &DataType) -> bool {
matches!(dtype, DataType::Datetime(_, _) | DataType::Date)
}
#[derive(Clone, Debug)]
struct AddressTermGroup {
alternatives: Vec<String>,
}
#[derive(Debug)]
struct AddressQuery {
full_postcode: Option<String>,
text_groups: Vec<AddressTermGroup>,
numeric_terms: Vec<String>,
candidate_terms: Vec<String>,
}
fn tokenize_address_text(text: &str) -> Vec<String> {
let mut tokens = Vec::new();
let mut current = String::new();
for ch in text.chars() {
if ch.is_ascii_alphanumeric() {
current.push(ch.to_ascii_lowercase());
} else if matches!(ch, '\'' | '' | '`') {
continue;
} else if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
}
if !current.is_empty() {
tokens.push(current);
}
tokens
}
fn is_full_postcode_compact(compact: &str) -> bool {
let bytes = compact.as_bytes();
let len = bytes.len();
if !(5..=7).contains(&len) {
return false;
}
let inward = &bytes[len - 3..];
if !inward[0].is_ascii_digit()
|| !inward[1].is_ascii_alphabetic()
|| !inward[2].is_ascii_alphabetic()
{
return false;
}
let outward = &bytes[..len - 3];
if !(2..=4).contains(&outward.len()) {
return false;
}
outward[0].is_ascii_alphabetic()
&& outward.iter().all(u8::is_ascii_alphanumeric)
&& outward.iter().any(u8::is_ascii_digit)
}
fn canonical_postcode_from_compact(compact: &str) -> String {
let upper = compact.to_ascii_uppercase();
let split = upper.len() - 3;
format!("{} {}", &upper[..split], &upper[split..])
}
fn extract_full_postcode(tokens: &[String]) -> Option<(String, Vec<usize>)> {
for (idx, token) in tokens.iter().enumerate() {
let compact = token.to_ascii_uppercase();
if is_full_postcode_compact(&compact) {
return Some((canonical_postcode_from_compact(&compact), vec![idx]));
}
}
for idx in 0..tokens.len().saturating_sub(1) {
let compact = format!(
"{}{}",
tokens[idx].to_ascii_uppercase(),
tokens[idx + 1].to_ascii_uppercase()
);
if is_full_postcode_compact(&compact) {
return Some((
canonical_postcode_from_compact(&compact),
vec![idx, idx + 1],
));
}
}
None
}
fn looks_like_postcode_fragment(token: &str) -> bool {
(2..=4).contains(&token.len())
&& token
.chars()
.next()
.is_some_and(|ch| ch.is_ascii_alphabetic())
&& token.chars().any(|ch| ch.is_ascii_digit())
&& token.chars().all(|ch| ch.is_ascii_alphanumeric())
}
fn is_numeric_address_token(token: &str) -> bool {
token.chars().all(|ch| ch.is_ascii_digit())
}
fn address_token_aliases(token: &str) -> Vec<&'static str> {
match token {
"apt" => vec!["apt", "apartment"],
"apartment" => vec!["apartment", "apt"],
"ave" => vec!["ave", "avenue"],
"avenue" => vec!["avenue", "ave"],
"blvd" => vec!["blvd", "boulevard"],
"boulevard" => vec!["boulevard", "blvd"],
"cl" => vec!["cl", "close"],
"close" => vec!["close", "cl"],
"ct" => vec!["ct", "court"],
"court" => vec!["court", "ct"],
"cres" => vec!["cres", "crescent"],
"crescent" => vec!["crescent", "cres"],
"dr" => vec!["dr", "drive"],
"drive" => vec!["drive", "dr"],
"fl" => vec!["fl", "flat"],
"flat" => vec!["flat", "fl"],
"gdns" => vec!["gdns", "gardens", "garden"],
"garden" => vec!["garden", "gardens", "gdns"],
"gardens" => vec!["gardens", "garden", "gdns"],
"hse" => vec!["hse", "house"],
"house" => vec!["house", "hse"],
"ln" => vec!["ln", "lane"],
"lane" => vec!["lane", "ln"],
"rd" => vec!["rd", "road"],
"road" => vec!["road", "rd"],
"sq" => vec!["sq", "square"],
"square" => vec!["square", "sq"],
"st" => vec!["st", "street", "saint"],
"street" => vec!["street", "st"],
"saint" => vec!["saint", "st"],
"terr" => vec!["terr", "terrace"],
"terrace" => vec!["terrace", "terr"],
_ => Vec::new(),
}
}
fn is_address_stop_token(token: &str) -> bool {
matches!(
token,
"a" | "an"
| "and"
| "apartment"
| "apt"
| "avenue"
| "ave"
| "block"
| "building"
| "bungalow"
| "close"
| "cl"
| "court"
| "ct"
| "cres"
| "crescent"
| "drive"
| "dr"
| "estate"
| "flat"
| "fl"
| "floor"
| "garden"
| "gardens"
| "gdns"
| "grove"
| "house"
| "hse"
| "lane"
| "ln"
| "lodge"
| "mansions"
| "mews"
| "of"
| "park"
| "place"
| "road"
| "rd"
| "room"
| "row"
| "saint"
| "sq"
| "square"
| "st"
| "street"
| "terr"
| "terrace"
| "the"
| "unit"
| "view"
| "villas"
| "walk"
| "way"
| "yard"
)
}
fn address_term_group(token: &str) -> Option<AddressTermGroup> {
if token.len() < 3 || is_numeric_address_token(token) || looks_like_postcode_fragment(token) {
return None;
}
let mut alternatives = Vec::new();
alternatives.push(token.to_string());
for alias in address_token_aliases(token) {
if !alternatives.iter().any(|existing| existing == alias) {
alternatives.push(alias.to_string());
}
}
if alternatives
.iter()
.all(|alternative| is_address_stop_token(alternative))
{
return None;
}
Some(AddressTermGroup { alternatives })
}
fn address_search_tokens(text: &str) -> Vec<String> {
let mut tokens: Vec<String> = tokenize_address_text(text)
.into_iter()
.filter(|token| is_address_search_token(token))
.collect();
tokens.sort_unstable();
tokens.dedup();
tokens
}
fn is_address_search_token(token: &str) -> bool {
if looks_like_postcode_fragment(token) {
return false;
}
if is_numeric_address_token(token) {
return true;
}
if token.chars().any(|ch| ch.is_ascii_digit()) {
return token.len() >= 2;
}
token.len() >= 3
}
fn is_address_candidate_token(token: &str) -> bool {
!is_numeric_address_token(token)
&& !looks_like_postcode_fragment(token)
&& (token.chars().any(|ch| ch.is_ascii_digit())
|| (token.len() >= 3 && !is_address_stop_token(token)))
}
fn address_prefix_key(term: &str) -> &str {
if term.len() > ADDRESS_SEARCH_PREFIX_MAX_LEN {
&term[..ADDRESS_SEARCH_PREFIX_MAX_LEN]
} else {
term
}
}
fn build_address_prefix_index(
address_token_index: &FxHashMap<String, Vec<u32>>,
) -> FxHashMap<String, Vec<String>> {
let mut prefix_index: FxHashMap<String, Vec<String>> = FxHashMap::default();
for token in address_token_index.keys() {
let max_prefix_len = token.len().min(ADDRESS_SEARCH_PREFIX_MAX_LEN);
for prefix_len in ADDRESS_SEARCH_PREFIX_MIN_LEN..=max_prefix_len {
prefix_index
.entry(token[..prefix_len].to_string())
.or_default()
.push(token.clone());
}
}
for tokens in prefix_index.values_mut() {
tokens.sort_unstable();
tokens.dedup();
}
prefix_index
}
fn parse_address_query(query: &str) -> AddressQuery {
let tokens = tokenize_address_text(query);
let (full_postcode, postcode_token_indices) = extract_full_postcode(&tokens)
.map(|(postcode, indices)| (Some(postcode), indices))
.unwrap_or((None, Vec::new()));
let skip_postcode_tokens: FxHashSet<usize> = postcode_token_indices.into_iter().collect();
let mut text_groups = Vec::new();
let mut numeric_terms = Vec::new();
let mut candidate_terms = Vec::new();
for (idx, token) in tokens.iter().enumerate() {
if skip_postcode_tokens.contains(&idx) || looks_like_postcode_fragment(token) {
continue;
}
if is_numeric_address_token(token) {
numeric_terms.push(token.clone());
continue;
}
if let Some(group) = address_term_group(token) {
for alternative in &group.alternatives {
if !is_address_stop_token(alternative)
&& !candidate_terms.iter().any(|term| term == alternative)
{
candidate_terms.push(alternative.clone());
}
}
text_groups.push(group);
} else if token.chars().any(|ch| ch.is_ascii_digit()) && token.len() >= 2 {
numeric_terms.push(token.clone());
if !candidate_terms.iter().any(|term| term == token) {
candidate_terms.push(token.clone());
}
}
}
text_groups.dedup_by(|left, right| left.alternatives == right.alternatives);
numeric_terms.sort_unstable();
numeric_terms.dedup();
AddressQuery {
full_postcode,
text_groups,
numeric_terms,
candidate_terms,
}
}
fn token_matches_query_term(token: &str, query_term: &str) -> bool {
token == query_term || (query_term.len() >= 3 && token.starts_with(query_term))
}
fn token_matches_numeric_term(token: &str, query_term: &str) -> bool {
token == query_term || token.starts_with(query_term)
}
fn address_tokens_match_group(tokens: &[String], group: &AddressTermGroup) -> bool {
group.alternatives.iter().any(|alternative| {
tokens
.iter()
.any(|token| token_matches_query_term(token, alternative))
})
}
/// Histogram with outlier buckets at the edges.
/// - Bin 0: [min, p1) — low outliers
/// - Bins 1 to n-2: [p1, p99) — main distribution, evenly divided
@ -163,6 +522,20 @@ pub struct PropertyData {
/// Interned postcodes: reader is thread-safe, keys index into it.
postcode_interner: lasso::RodeoReader,
postcode_keys: Vec<lasso::Spur>,
/// Rows for each postcode, keyed by the interned postcode key.
postcode_row_index: FxHashMap<lasso::Spur, Vec<u32>>,
/// Inverted index from address tokens to property rows.
address_token_index: FxHashMap<String, Vec<u32>>,
/// Prefix lookup from typed address-token prefix to indexed full address tokens.
address_prefix_index: FxHashMap<String, Vec<String>>,
/// Interned normalized address-search tokens used for per-row scoring.
address_search_interner: lasso::RodeoReader,
/// Flat per-row normalized address-search token keys.
address_search_token_keys: Vec<lasso::Spur>,
/// Offset into `address_search_token_keys` for each row.
address_search_token_offsets: Vec<u32>,
/// Number of normalized address-search token keys for each row.
address_search_token_lengths: Vec<u16>,
/// For enum features: maps feature index to list of possible string values.
/// Index in values list corresponds to the u16 value stored in feature_data.
pub enum_values: rustc_hash::FxHashMap<usize, Vec<String>>,
@ -197,6 +570,164 @@ impl PropertyData {
(&self.postcode_interner, &self.postcode_keys)
}
fn row_address_search_tokens(&self, row: usize) -> &[lasso::Spur] {
let offset = self.address_search_token_offsets[row] as usize;
let length = self.address_search_token_lengths[row] as usize;
&self.address_search_token_keys[offset..offset + length]
}
/// Search individual property addresses. Full postcode queries use a direct row index;
/// free-text queries use a small inverted index over distinctive address tokens.
pub fn search_addresses(&self, query: &str, limit: usize) -> Vec<usize> {
if limit == 0 {
return Vec::new();
}
let parsed = parse_address_query(query);
if parsed.full_postcode.is_none()
&& parsed.text_groups.is_empty()
&& parsed.numeric_terms.is_empty()
{
return Vec::new();
}
let candidate_rows: Vec<u32> = if let Some(postcode) = parsed.full_postcode.as_deref() {
self.postcode_interner
.get(postcode)
.and_then(|key| self.postcode_row_index.get(&key))
.map(|rows| rows.to_vec())
.unwrap_or_default()
} else if let Some(rows) = self.best_address_token_rows(&parsed.candidate_terms) {
rows.iter()
.take(ADDRESS_SEARCH_CANDIDATE_LIMIT)
.copied()
.collect()
} else {
Vec::new()
};
if candidate_rows.is_empty() {
return Vec::new();
}
let mut scored: Vec<(i32, usize, usize)> = candidate_rows
.into_iter()
.filter_map(|row| {
let row = row as usize;
self.address_match_score(row, &parsed)
.map(|score| (score, self.address(row).len(), row))
})
.collect();
scored.sort_unstable_by(|left, right| {
right
.0
.cmp(&left.0)
.then(left.1.cmp(&right.1))
.then(left.2.cmp(&right.2))
});
let mut seen = FxHashSet::default();
let mut results = Vec::with_capacity(limit);
for (_, _, row) in scored {
let address = self.address(row).trim();
if address.is_empty() {
continue;
}
let key = format!("{}\n{}", address.to_ascii_lowercase(), self.postcode(row));
if !seen.insert(key) {
continue;
}
results.push(row);
if results.len() == limit {
break;
}
}
results
}
fn best_address_token_rows(&self, terms: &[String]) -> Option<&[u32]> {
let mut best: Option<&[u32]> = None;
for term in terms {
if let Some(rows) = self.address_token_index.get(term) {
if best.map_or(true, |current| rows.len() < current.len()) {
best = Some(rows.as_slice());
}
continue;
}
if term.len() < 4 {
continue;
}
if let Some(tokens) = self.address_prefix_index.get(address_prefix_key(term)) {
for token in tokens {
if !token.starts_with(term) {
continue;
}
if let Some(rows) = self.address_token_index.get(token) {
if best.map_or(true, |current| rows.len() < current.len()) {
best = Some(rows.as_slice());
}
}
}
}
}
best
}
fn address_match_score(&self, row: usize, parsed: &AddressQuery) -> Option<i32> {
if self.address(row).trim().is_empty() {
return None;
}
let tokens = self.row_address_search_tokens(row);
if parsed
.text_groups
.iter()
.any(|group| !self.address_tokens_match_group(tokens, group))
{
return None;
}
let numeric_matches = parsed
.numeric_terms
.iter()
.filter(|term| {
tokens.iter().any(|token| {
token_matches_numeric_term(self.address_search_interner.resolve(token), term)
})
})
.count();
if !parsed.numeric_terms.is_empty() && numeric_matches == 0 {
return None;
}
let mut score = 0;
if parsed.full_postcode.is_some() {
score += 1_000;
}
score += (parsed.text_groups.len() as i32) * 200;
score += (numeric_matches as i32) * 90;
if numeric_matches == parsed.numeric_terms.len() && numeric_matches > 0 {
score += 50;
}
Some(score)
}
fn address_tokens_match_group(&self, tokens: &[lasso::Spur], group: &AddressTermGroup) -> bool {
group.alternatives.iter().any(|alternative| {
tokens.iter().any(|token| {
token_matches_query_term(self.address_search_interner.resolve(token), alternative)
})
})
}
/// Get the is_approx_build_date flag for a given row (bit-packed).
pub fn is_approx_build_date(&self, row: usize) -> bool {
let byte = self.approx_build_date_bits[row / 8];
@ -946,27 +1477,70 @@ impl PropertyData {
.map(|&perm_index| lon[perm_index as usize])
.collect();
// Build contiguous address buffer (permuted)
// Build contiguous address buffer and address search index (permuted)
tracing::info!("Building interned strings");
let total_addr_bytes: usize = address_raw.iter().map(|text| text.len()).sum();
let mut address_buffer = String::with_capacity(total_addr_bytes);
let mut address_offsets = Vec::with_capacity(row_count);
let mut address_lengths = Vec::with_capacity(row_count);
for &perm_index in &perm {
let mut address_token_index: FxHashMap<String, Vec<u32>> = FxHashMap::default();
let mut address_search_rodeo = lasso::Rodeo::default();
let mut address_search_token_keys: Vec<lasso::Spur> = Vec::new();
let mut address_search_token_offsets = Vec::with_capacity(row_count);
let mut address_search_token_lengths = Vec::with_capacity(row_count);
for (new_row, &perm_index) in perm.iter().enumerate() {
let addr = &address_raw[perm_index as usize];
let offset = address_buffer.len() as u32;
let length = addr.len().min(u16::MAX as usize) as u16;
address_offsets.push(offset);
address_lengths.push(length);
address_buffer.push_str(&addr[..length as usize]);
let search_tokens = address_search_tokens(addr);
let token_offset = address_search_token_keys.len() as u32;
let token_length = search_tokens.len().min(u16::MAX as usize) as u16;
address_search_token_offsets.push(token_offset);
address_search_token_lengths.push(token_length);
for token in search_tokens.iter().take(token_length as usize) {
let key = address_search_rodeo.get_or_intern(token);
address_search_token_keys.push(key);
if is_address_candidate_token(token) {
address_token_index
.entry(token.clone())
.or_default()
.push(new_row as u32);
}
}
}
let address_token_count_before_prune = address_token_index.len();
address_token_index.retain(|_, rows| rows.len() <= ADDRESS_SEARCH_MAX_POSTINGS_PER_TOKEN);
let address_prefix_index = build_address_prefix_index(&address_token_index);
let address_search_interner = address_search_rodeo.into_reader();
let address_postings_count: usize = address_token_index.values().map(Vec::len).sum();
tracing::info!(
tokens = address_token_index.len(),
prefixes = address_prefix_index.len(),
pruned_tokens =
address_token_count_before_prune.saturating_sub(address_token_index.len()),
postings = address_postings_count,
row_tokens = address_search_token_keys.len(),
"Address search index built"
);
// Intern postcodes (permuted)
let mut postcode_rodeo = lasso::Rodeo::default();
let postcode_keys: Vec<lasso::Spur> = perm
.iter()
.map(|&perm_index| postcode_rodeo.get_or_intern(&postcode_raw[perm_index as usize]))
.collect();
let mut postcode_keys: Vec<lasso::Spur> = Vec::with_capacity(row_count);
let mut postcode_row_index: FxHashMap<lasso::Spur, Vec<u32>> = FxHashMap::default();
for (new_row, &perm_index) in perm.iter().enumerate() {
let key = postcode_rodeo.get_or_intern(&postcode_raw[perm_index as usize]);
postcode_keys.push(key);
postcode_row_index
.entry(key)
.or_default()
.push(new_row as u32);
}
let postcode_interner = postcode_rodeo.into_reader();
// Pack is_approx_build_date into a bitvec (8 bools per byte)
@ -1110,6 +1684,13 @@ impl PropertyData {
address_lengths,
postcode_interner,
postcode_keys,
postcode_row_index,
address_token_index,
address_prefix_index,
address_search_interner,
address_search_token_keys,
address_search_token_offsets,
address_search_token_lengths,
enum_values,
enum_counts,
approx_build_date_bits,
@ -1133,6 +1714,120 @@ mod tests {
Bounds::Percentile { low, high }
}
#[test]
fn full_postcode_detection_accepts_common_formats() {
assert!(is_full_postcode_compact("SW1A1AA"));
assert!(is_full_postcode_compact("E142DG"));
assert!(is_full_postcode_compact("M11AE"));
assert!(!is_full_postcode_compact("E14"));
assert!(!is_full_postcode_compact("DOWNING"));
assert!(!is_full_postcode_compact("10A"));
}
#[test]
fn address_query_parsing_skips_postcodes_and_street_suffixes() {
let parsed = parse_address_query("Flat 2, 10 Downing St, SW1A 2AA");
assert_eq!(parsed.full_postcode.as_deref(), Some("SW1A 2AA"));
assert_eq!(
parsed.numeric_terms,
vec!["10".to_string(), "2".to_string()]
);
assert_eq!(parsed.candidate_terms, vec!["downing".to_string()]);
assert_eq!(parsed.text_groups.len(), 1);
assert_eq!(
parsed.text_groups[0].alternatives,
vec!["downing".to_string()]
);
}
#[test]
fn address_query_parsing_handles_compact_postcodes() {
let parsed = parse_address_query("10 downing street sw1a1aa");
assert_eq!(parsed.full_postcode.as_deref(), Some("SW1A 1AA"));
assert_eq!(parsed.numeric_terms, vec!["10".to_string()]);
assert_eq!(parsed.candidate_terms, vec!["downing".to_string()]);
}
#[test]
fn address_query_parsing_keeps_partial_terms_for_row_matching() {
let parsed = parse_address_query("settlers cour");
assert_eq!(parsed.full_postcode, None);
assert_eq!(parsed.numeric_terms, Vec::<String>::new());
assert_eq!(
parsed.candidate_terms,
vec!["settlers".to_string(), "cour".to_string()]
);
assert_eq!(parsed.text_groups.len(), 2);
assert_eq!(
parsed.text_groups[0].alternatives,
vec!["settlers".to_string()]
);
assert_eq!(parsed.text_groups[1].alternatives, vec!["cour".to_string()]);
}
#[test]
fn address_search_tokens_keep_actual_address_terms_for_scoring() {
let tokens = address_search_tokens("Flat 2, 10 Downing Cour");
assert_eq!(
tokens,
vec![
"10".to_string(),
"2".to_string(),
"cour".to_string(),
"downing".to_string(),
"flat".to_string()
]
);
}
#[test]
fn address_prefix_index_finds_partial_address_terms() {
let mut token_index: FxHashMap<String, Vec<u32>> = FxHashMap::default();
token_index.insert("downing".to_string(), vec![1]);
token_index.insert("downton".to_string(), vec![2]);
token_index.insert("market".to_string(), vec![3]);
let prefix_index = build_address_prefix_index(&token_index);
assert_eq!(
prefix_index.get("down").cloned().unwrap_or_default(),
vec!["downing".to_string(), "downton".to_string()]
);
assert_eq!(
prefix_index.get("downi").cloned().unwrap_or_default(),
vec!["downing".to_string()]
);
assert_eq!(
prefix_index.get("downt").cloned().unwrap_or_default(),
vec!["downton".to_string()]
);
assert!(!prefix_index.contains_key("do"));
}
#[test]
fn address_term_matching_allows_prefixes_and_aliases() {
let tokens = tokenize_address_text("10 Downing Street");
let prefix_group = address_term_group("down").expect("prefix term should be searchable");
let alias_group = AddressTermGroup {
alternatives: vec!["st".to_string(), "street".to_string()],
};
assert!(address_tokens_match_group(&tokens, &prefix_group));
assert!(address_tokens_match_group(&tokens, &alias_group));
}
#[test]
fn address_term_matching_uses_actual_token_prefixes() {
let tokens = tokenize_address_text("12 Settlers Court");
let prefix_group = address_term_group("cou").expect("partial term should be searchable");
assert!(address_tokens_match_group(&tokens, &prefix_group));
}
#[test]
fn histogram_empty_data() {
let data: Vec<f32> = vec![];

View file

@ -413,7 +413,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
features: &[
Feature::Numeric(FeatureConfig {
name: "Income Score (rate)",
bounds: Bounds::Fixed { min: 0.0, max: 0.6 },
bounds: Bounds::Fixed { min: 0.0, max: 1.0 },
step: 0.01,
description: "Income deprivation rate, inverted (higher = less deprived)",
detail: "From the English Indices of Deprivation (inverted so higher = better). Higher values indicate less income deprivation. Based on Income Support, income-based Jobseeker's Allowance, income-based Employment and Support Allowance, Pension Credit, Working Tax Credit and Child Tax Credit, Universal Credit, and asylum seekers.",
@ -425,7 +425,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
}),
Feature::Numeric(FeatureConfig {
name: "Employment Score (rate)",
bounds: Bounds::Fixed { min: 0.0, max: 0.4 },
bounds: Bounds::Fixed { min: 0.0, max: 1.0 },
step: 0.01,
description: "Employment deprivation rate, inverted (higher = less deprived)",
detail: "From the English Indices of Deprivation (inverted so higher = better). Higher values indicate less employment deprivation. Based on claimants of Jobseeker's Allowance, Employment and Support Allowance, Incapacity Benefit, Severe Disablement Allowance, Carer's Allowance, and relevant Universal Credit claimants.",

View file

@ -1,16 +1,26 @@
use std::sync::Arc;
use axum::body::Body;
use axum::body::{to_bytes, Body};
use axum::extract::Request;
use axum::http::header;
use axum::http::{header, StatusCode};
use axum::middleware::Next;
use axum::response::Response;
use tracing::warn;
use crate::state::AppState;
const OG_PLACEHOLDER: &str =
r#"<meta name="x-og-placeholder" content="__PERFECT_POSTCODE_OG_TAGS__"/>"#;
const HTML_BODY_LIMIT: usize = 5 * 1024 * 1024;
struct SeoPage {
canonical_path: &'static str,
title: &'static str,
description: &'static str,
indexable: bool,
}
/// Escape a string for safe inclusion inside a double-quoted HTML attribute value.
fn escape_attr(s: &str) -> String {
let mut out = String::with_capacity(s.len());
@ -26,6 +36,279 @@ fn escape_attr(s: &str) -> String {
out
}
fn trim_trailing_slash(path: &str) -> &str {
if path.len() > 1 {
path.trim_end_matches('/')
} else {
path
}
}
fn seo_page_for_path(path: &str) -> Option<SeoPage> {
let path = trim_trailing_slash(path);
match path {
"/" => Some(SeoPage {
canonical_path: "/",
title: "Perfect Postcode - Find where to buy before browsing listings",
description: "Search every postcode by budget, commute, schools, safety, noise, broadband, prices and more. Build a better home-buying shortlist before viewings.",
indexable: true,
}),
"/learn" | "/support" => Some(SeoPage {
canonical_path: "/learn",
title: "How Perfect Postcode works - Data sources, FAQ and support",
description: "Learn how Perfect Postcode combines property prices, EPC records, travel times, crime, schools, broadband, noise, amenities and open data for postcode research.",
indexable: true,
}),
"/pricing" => Some(SeoPage {
canonical_path: "/pricing",
title: "Perfect Postcode pricing - Lifetime property search map access",
description: "Get lifetime access to the postcode property search map for England, including filters, saved searches, exports, and future data updates.",
indexable: true,
}),
"/property-price-map" => Some(SeoPage {
canonical_path: "/property-price-map",
title: "Property price map for England - Compare postcodes before viewing",
description: "Compare sold prices, estimated current value, price per square metre and local context across English postcodes before searching listings.",
indexable: true,
}),
"/postcode-property-search" => Some(SeoPage {
canonical_path: "/postcode-property-search",
title: "Postcode property search - Find areas that match your criteria",
description: "Search every postcode by budget, property type, floor area, tenure, commute, schools, crime, broadband, noise, parks and local amenities.",
indexable: true,
}),
"/commute-property-search" => Some(SeoPage {
canonical_path: "/commute-property-search",
title: "Commute property search - Find places to live by travel time",
description: "Filter postcodes by commute time, then compare price, schools, safety, broadband, road noise, parks and property data on one map.",
indexable: true,
}),
"/school-property-search" => Some(SeoPage {
canonical_path: "/school-property-search",
title: "School property search - Compare postcodes for family moves",
description: "Compare nearby schools, property size, prices, parks, safety, commute and local amenities before building a viewing shortlist.",
indexable: true,
}),
"/postcode-checker" => Some(SeoPage {
canonical_path: "/postcode-checker",
title: "Postcode checker - Property, crime, broadband, noise and schools",
description: "Check postcode-level property prices, EPC data, crime, broadband, road noise, schools, council tax, amenities and travel-time context.",
indexable: true,
}),
"/property-search/birmingham" => Some(SeoPage {
canonical_path: "/property-search/birmingham",
title: "Birmingham property search - Compare postcodes by price and commute",
description: "Use postcode-level data to compare Birmingham property prices, commute trade-offs, schools, crime, broadband and local amenities before viewings.",
indexable: true,
}),
"/property-search/manchester" => Some(SeoPage {
canonical_path: "/property-search/manchester",
title: "Manchester property search - Compare postcodes before viewing",
description: "Compare Manchester-area postcodes by budget, commute, property type, schools, broadband, crime, noise and amenities before booking viewings.",
indexable: true,
}),
"/property-search/bristol" => Some(SeoPage {
canonical_path: "/property-search/bristol",
title: "Bristol property search - Compare postcodes by commute and price",
description: "Compare Bristol postcodes by price, commute, property size, schools, broadband, crime, road noise, parks and amenities before viewings.",
indexable: true,
}),
"/data-sources" => Some(SeoPage {
canonical_path: "/data-sources",
title: "Perfect Postcode data sources - Property, schools, commute and local context",
description: "Review the public and official datasets used by Perfect Postcode, including property prices, EPC, schools, crime, broadband, noise and travel-time context.",
indexable: true,
}),
"/methodology" => Some(SeoPage {
canonical_path: "/methodology",
title: "Perfect Postcode methodology - How to interpret postcode property data",
description: "Understand how to use postcode filters, property estimates, travel-time data, school context and local signals as a home-buying shortlist tool.",
indexable: true,
}),
"/privacy-security" => Some(SeoPage {
canonical_path: "/privacy-security",
title: "Perfect Postcode privacy and security - Saved searches and account data",
description: "Learn how Perfect Postcode treats saved searches, account data and property research workflows with privacy and security in mind.",
indexable: true,
}),
"/dashboard" => Some(SeoPage {
canonical_path: "/dashboard",
title: "Perfect Postcode dashboard",
description: "Explore postcode property data, travel times, prices, schools, crime, noise, broadband and amenities on the interactive map.",
indexable: false,
}),
"/saved" => Some(SeoPage {
canonical_path: "/saved",
title: "Perfect Postcode account",
description: "Manage your Perfect Postcode account, saved searches, saved properties and invitations.",
indexable: false,
}),
"/invites" => Some(SeoPage {
canonical_path: "/invites",
title: "Perfect Postcode account",
description: "Manage your Perfect Postcode account, saved searches, saved properties and invitations.",
indexable: false,
}),
"/account" => Some(SeoPage {
canonical_path: "/account",
title: "Perfect Postcode account",
description: "Manage your Perfect Postcode account, saved searches, saved properties and invitations.",
indexable: false,
}),
_ if path.starts_with("/invite/") => Some(SeoPage {
canonical_path: "/invite",
title: "You're invited to Perfect Postcode",
description: "Accept your invitation to explore property prices, energy ratings, crime stats, school ratings, and more across England.",
indexable: false,
}),
_ => None,
}
}
fn is_passthrough_path(path: &str) -> bool {
path.starts_with("/api/")
|| path.starts_with("/pb/")
|| path.starts_with("/s/")
|| path.starts_with("/assets/")
|| matches!(
path,
"/health"
| "/metrics"
| "/robots.txt"
| "/sitemap.xml"
| "/favicon.svg"
| "/bundle.js"
| "/main.css"
| "/house.png"
)
|| path
.rsplit('/')
.next()
.is_some_and(|segment| segment.contains('.'))
}
fn should_return_404(path: &str) -> bool {
!is_passthrough_path(path) && seo_page_for_path(path).is_none()
}
fn not_found_response(public_url: &str, path: &str) -> Response {
let public_url_e = escape_attr(public_url);
let path_e = escape_attr(path);
let html = format!(
r#"<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="robots" content="noindex,follow" />
<title>Page not found - Perfect Postcode</title>
<meta name="description" content="This Perfect Postcode page could not be found." />
<link rel="canonical" href="{public_url_e}/" />
</head>
<body>
<main>
<h1>Page not found</h1>
<p>The requested path was not found: {path_e}</p>
<p><a href="{public_url_e}/">Go to Perfect Postcode</a></p>
</main>
</body>
</html>"#
);
let mut response = Response::new(Body::from(html));
*response.status_mut() = StatusCode::NOT_FOUND;
response.headers_mut().insert(
header::CONTENT_TYPE,
header::HeaderValue::from_static("text/html; charset=utf-8"),
);
response
}
fn route_seo_tags(page: &SeoPage, path: &str, query_string: &str, public_url: &str) -> String {
let path_e = escape_attr(path);
let query_e = escape_attr(query_string);
let public_url_e = escape_attr(public_url.trim_end_matches('/'));
let canonical_path_e = escape_attr(page.canonical_path);
let title_e = escape_attr(page.title);
let description_e = escape_attr(page.description);
let is_invite = path.starts_with("/invite/");
let og_image_url = if is_invite {
if query_string.is_empty() {
format!("{public_url_e}/api/screenshot?og=1&amp;path={path_e}")
} else {
format!("{public_url_e}/api/screenshot?og=1&amp;path={path_e}&amp;{query_e}")
}
} else if query_string.is_empty() {
format!("{public_url_e}/api/screenshot?og=1")
} else {
format!("{public_url_e}/api/screenshot?og=1&amp;{query_e}")
};
let canonical_url = format!("{public_url_e}{canonical_path_e}");
let og_url = if query_string.is_empty() {
format!("{public_url_e}{path_e}")
} else {
format!("{public_url_e}{path_e}?{query_e}")
};
let robots = if page.indexable {
"index,follow"
} else {
"noindex,follow"
};
format!(
r#"<meta name="robots" content="{robots}" />
<link rel="canonical" href="{canonical_url}" />
<meta property="og:title" content="{title_e}" />
<meta property="og:description" content="{description_e}" />
<meta property="og:type" content="website" />
<meta property="og:url" content="{og_url}" />
<meta property="og:site_name" content="Perfect Postcode" />
<meta property="og:logo" content="{public_url_e}/favicon.svg" />
<meta property="og:image" content="{og_image_url}" />
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="{title_e}" />
<meta name="twitter:description" content="{description_e}" />
<meta name="twitter:image" content="{og_image_url}" />"#
)
}
fn inject_tags(mut html: String, page: &SeoPage, tags: &str) -> String {
if let Some(start) = html.find("<title>") {
if let Some(end_offset) = html[start..].find("</title>") {
let end = start + end_offset + "</title>".len();
html.replace_range(
start..end,
&format!("<title>{}</title>", escape_attr(page.title)),
);
}
}
if let Some(start) = html.find(r#"<meta name="description""#) {
if let Some(end_offset) = html[start..].find('>') {
let end = start + end_offset + 1;
html.replace_range(
start..end,
&format!(
r#"<meta name="description" content="{}" />"#,
escape_attr(page.description)
),
);
}
}
if html.contains(OG_PLACEHOLDER) {
return html.replace(OG_PLACEHOLDER, tags);
}
if let Some(index) = html.find("</head>") {
html.insert_str(index, tags);
}
html
}
pub async fn og_middleware(request: Request, next: Next) -> Response {
let path = request.uri().path().to_string();
// Capture the query string before passing the request through
@ -34,6 +317,12 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
// Get state from extensions
let state = request.extensions().get::<Arc<AppState>>().cloned();
if let Some(st) = &state {
if !st.is_dev && should_return_404(&path) {
return not_found_response(&st.public_url, &path);
}
}
let response = next.run(request).await;
// Only inject OG tags into SPA HTML responses, not proxied PocketBase responses
@ -56,68 +345,25 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
None => return response,
};
let index_html = match &state.index_html {
Some(html) => html,
let page = match seo_page_for_path(&path) {
Some(page) => page,
None => return response,
};
// Build OG-injected HTML (og=1 triggers heading overlay on screenshot).
// All URL components are HTML-escaped before interpolation into attributes
// because path/query are attacker-controlled.
let is_invite = path.starts_with("/invite/");
let path_e = escape_attr(&path);
let query_e = escape_attr(&query_string);
let public_url_e = escape_attr(&state.public_url);
let og_image_url = if is_invite {
// Include path= so the screenshot service navigates to /invite/CODE
if query_string.is_empty() {
format!("{public_url_e}/api/screenshot?og=1&amp;path={path_e}")
} else {
format!("{public_url_e}/api/screenshot?og=1&amp;path={path_e}&amp;{query_e}")
let (mut parts, body) = response.into_parts();
let bytes = match to_bytes(body, HTML_BODY_LIMIT).await {
Ok(bytes) => bytes,
Err(err) => {
warn!("Failed to buffer HTML body for SEO tag injection: {err}");
let mut response = Response::from_parts(parts, Body::empty());
*response.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return response;
}
} else if query_string.is_empty() {
format!("{public_url_e}/api/screenshot?og=1")
} else {
format!("{public_url_e}/api/screenshot?og=1&amp;{query_e}")
};
let og_url = if query_string.is_empty() {
format!("{public_url_e}{path_e}")
} else {
format!("{public_url_e}{path_e}?{query_e}")
};
let og_logo = format!("{public_url_e}/favicon.svg");
let (og_title, og_description) = if is_invite {
(
"You\u{2019}re invited to Perfect Postcode",
"Accept your invitation to explore property prices, energy ratings, crime stats, school ratings, and more across England.",
)
} else {
(
"Perfect Postcode \u{2014} Every neighbourhood in England",
"Explore property prices, energy ratings, crime stats, school ratings, and more across England on one interactive map.",
)
};
let og_tags = format!(
r#"<meta property="og:title" content="{og_title}" />
<meta property="og:description" content="{og_description}" />
<meta property="og:type" content="website" />
<meta property="og:url" content="{og_url}" />
<meta property="og:logo" content="{og_logo}" />
<meta property="og:image" content="{og_image_url}" />
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="{og_title}" />
<meta name="twitter:description" content="{og_description}" />"#
);
let html = index_html.replace(OG_PLACEHOLDER, &og_tags);
let (parts, _body) = response.into_parts();
let html = String::from_utf8_lossy(&bytes).into_owned();
let tags = route_seo_tags(&page, &path, &query_string, &state.public_url);
let html = inject_tags(html, &page, &tags);
parts.headers.remove(header::CONTENT_LENGTH);
Response::from_parts(parts, Body::from(html))
}

View file

@ -143,6 +143,9 @@ fn execute_destination_search(state: &AppState, query: &str, mode: &str) -> Valu
.iter()
.enumerate()
.filter_map(|(idx, name_lower)| {
if !pd.travel_destination[idx] {
return None;
}
let words_match = query_words.iter().all(|word| name_lower.contains(word));
let slug = slugify(&pd.name[idx]);
let slug_match = slug.contains(&query_slug) || query_slug.contains(&slug);
@ -169,6 +172,9 @@ fn execute_destination_search(state: &AppState, query: &str, mode: &str) -> Valu
.iter()
.enumerate()
.find_map(|(idx, name_lower)| {
if !pd.travel_destination[idx] {
return None;
}
let words_match = query_words.iter().all(|word| name_lower.contains(word));
let slug = slugify(&pd.name[idx]);
let slug_match = slug.contains(&query_slug) || query_slug.contains(&slug);
@ -186,6 +192,9 @@ fn execute_destination_search(state: &AppState, query: &str, mode: &str) -> Valu
.iter()
.enumerate()
.filter_map(|(idx, city_opt)| {
if !pd.travel_destination[idx] {
return None;
}
let city = city_opt.as_deref()?;
if city.to_lowercase() != city_lower {
return None;

View file

@ -6,7 +6,7 @@ use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::data::slugify;
use crate::data::{normalize_search_text, slugify};
use crate::state::SharedState;
#[derive(Serialize)]
@ -20,9 +20,21 @@ pub struct PlaceResult {
city: Option<String>,
}
#[derive(Serialize)]
pub struct AddressResult {
address: String,
postcode: String,
lat: f32,
lon: f32,
}
#[derive(Serialize)]
pub struct PlacesResponse {
places: Vec<PlaceResult>,
#[serde(skip_serializing_if = "Vec::is_empty")]
postcodes: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
addresses: Vec<AddressResult>,
}
#[derive(Deserialize)]
@ -34,6 +46,53 @@ pub struct PlacesParams {
mode: Option<String>,
}
fn compact_postcode_query(query: &str) -> String {
query
.chars()
.filter(|ch| !ch.is_whitespace())
.map(|ch| ch.to_ascii_uppercase())
.collect()
}
fn looks_like_postcode_prefix(query: &str) -> bool {
let compact = compact_postcode_query(query);
if compact.len() < 2 || compact.len() > 7 {
return false;
}
compact
.chars()
.next()
.is_some_and(|ch| ch.is_ascii_alphabetic())
&& compact.chars().all(|ch| ch.is_ascii_alphanumeric())
&& compact.chars().any(|ch| ch.is_ascii_digit())
}
fn postcode_starts_with_compact(postcode: &str, compact_query: &str) -> bool {
let mut query_chars = compact_query.chars();
let mut current = query_chars.next();
if current.is_none() {
return false;
}
for postcode_char in postcode.chars() {
if postcode_char.is_whitespace() {
continue;
}
match current {
Some(query_char) if postcode_char.to_ascii_uppercase() == query_char => {
current = query_chars.next();
if current.is_none() {
return true;
}
}
_ => return false,
}
}
current.is_none()
}
pub async fn get_places(
State(shared): State<Arc<SharedState>>,
Query(params): Query<PlacesParams>,
@ -51,31 +110,39 @@ pub async fn get_places(
let places = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let query_lower = query.to_lowercase();
let query_search = normalize_search_text(&query);
let pd = &state.place_data;
let od = &state.outcode_data;
let postcode_data = &state.postcode_data;
let tt_store = &state.travel_time_store;
let property_data = &state.data;
// Linear scan — ~50-100k rows, <1ms
// Tuple: (row_idx, is_exact, is_prefix, type_rank, population, name_len, slug)
let mut matches: Vec<(usize, bool, bool, u8, u32, usize, String)> = pd
.name_lower
.name_search
.iter()
.enumerate()
.filter_map(|(idx, name)| {
if !name.contains(&query_lower) {
.filter_map(|(idx, search_text)| {
if query_search.is_empty() || !search_text.contains(&query_search) {
return None;
}
let slug = slugify(&pd.name[idx]);
// If mode filter is set, only include places with travel data
// If mode filter is set, keep the historical travel destination set only.
if let Some(ref mode) = mode_filter {
if !tt_store.has_destination(mode, &slug) {
if !pd.travel_destination[idx] || !tt_store.has_destination(mode, &slug) {
return None;
}
}
let is_exact = name.len() == query_lower.len();
let is_prefix = name.starts_with(&query_lower);
let is_exact = search_text
.split(" | ")
.any(|alias| alias == query_search || pd.name_lower[idx] == query_lower);
let is_prefix = search_text
.split(" | ")
.any(|alias| alias.starts_with(&query_search))
|| pd.name_lower[idx].starts_with(&query_lower);
Some((
idx,
is_exact,
@ -153,20 +220,76 @@ pub async fn get_places(
results = outcode_results;
}
let postcodes: Vec<String> = if mode_filter.is_none() && looks_like_postcode_prefix(&query)
{
let compact_query = compact_postcode_query(&query);
postcode_data
.postcodes
.iter()
.filter(|postcode| postcode_starts_with_compact(postcode, &compact_query))
.take(limit)
.cloned()
.collect()
} else {
Vec::new()
};
let addresses: Vec<AddressResult> = if mode_filter.is_none() {
property_data
.search_addresses(&query, limit)
.into_iter()
.map(|row| AddressResult {
address: property_data.address(row).trim().to_string(),
postcode: property_data.postcode(row).to_string(),
lat: property_data.lat[row],
lon: property_data.lon[row],
})
.collect()
} else {
Vec::new()
};
let elapsed = t0.elapsed();
info!(
query = query.as_str(),
results = results.len(),
postcodes = postcodes.len(),
addresses = addresses.len(),
scanned = pd.name_lower.len(),
mode = mode_filter.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/places"
);
results
(results, postcodes, addresses)
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok(Json(PlacesResponse { places }))
Ok(Json(PlacesResponse {
places: places.0,
postcodes: places.1,
addresses: places.2,
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_postcode_prefixes() {
assert!(looks_like_postcode_prefix("EC2R"));
assert!(looks_like_postcode_prefix("sw1a 1"));
assert!(looks_like_postcode_prefix("M4"));
assert!(!looks_like_postcode_prefix("London"));
assert!(!looks_like_postcode_prefix("E"));
}
#[test]
fn postcode_prefix_match_ignores_spaces() {
assert!(postcode_starts_with_compact("EC2R 8AH", "EC2R8"));
assert!(postcode_starts_with_compact("SW1A 1AA", "SW1A1"));
assert!(!postcode_starts_with_compact("SW1A 1AA", "SW1A2"));
}
}

View file

@ -22,6 +22,8 @@ pub struct PostcodePropertiesParams {
pub filters: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
/// Exact address to rank first when opening properties from address search.
pub focus_address: Option<String>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
pub share: Option<String>,
}
@ -67,6 +69,12 @@ pub async fn get_postcode_properties(
let filters_str = params.filters;
let postcode_str = normalized;
let focus_address = params
.focus_address
.as_deref()
.map(str::trim)
.filter(|address| !address.is_empty())
.map(str::to_ascii_lowercase);
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
@ -100,7 +108,20 @@ pub async fn get_postcode_properties(
}
});
matching_rows.sort_unstable_by_key(|&row| state.data.address(row).trim().is_empty());
matching_rows.sort_unstable_by(|&left, &right| {
let left_address = state.data.address(left).trim();
let right_address = state.data.address(right).trim();
let left_focused = focus_address
.as_ref()
.is_some_and(|address| left_address.eq_ignore_ascii_case(address));
let right_focused = focus_address
.as_ref()
.is_some_and(|address| right_address.eq_ignore_ascii_case(address));
right_focused
.cmp(&left_focused)
.then(left_address.is_empty().cmp(&right_address.is_empty()))
});
let total = matching_rows.len();
let limit = params

View file

@ -54,6 +54,9 @@ pub async fn get_travel_destinations(
.iter()
.enumerate()
.filter_map(|(idx, name)| {
if !pd.travel_destination[idx] {
return None;
}
let slug = slugify(name);
if slug_set.contains(&slug) {
Some((idx, slug, pd.type_rank[idx], pd.population[idx], name.len()))