Fix crime & add actual listings
This commit is contained in:
parent
017902b8e6
commit
ebe7bbb51d
34 changed files with 2014 additions and 172754 deletions
|
|
@ -1,3 +1,4 @@
|
|||
mod actual_listings;
|
||||
mod places;
|
||||
mod poi;
|
||||
mod postcodes;
|
||||
|
|
@ -33,6 +34,7 @@ where
|
|||
})
|
||||
}
|
||||
|
||||
pub use actual_listings::{ActualListing, ActualListingData};
|
||||
pub use places::{normalize_search_text, PlaceData};
|
||||
pub use poi::{resolve_poi_category_filter, POICategoryGroup, POIData};
|
||||
pub use postcodes::{OutcodeData, PostcodeData};
|
||||
|
|
|
|||
326
server-rs/src/data/actual_listings.rs
Normal file
326
server-rs/src/data/actual_listings.rs
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::*;
|
||||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
use crate::utils::{normalize_postcode, GridIndex, InternedColumn};
|
||||
|
||||
const GRID_CELL_SIZE: f32 = 0.01;
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub struct ActualListing {
|
||||
pub lat: f32,
|
||||
pub lon: f32,
|
||||
pub postcode: String,
|
||||
pub address: Option<String>,
|
||||
pub property_type: Option<String>,
|
||||
pub property_sub_type: Option<String>,
|
||||
pub leasehold_freehold: Option<String>,
|
||||
pub price_qualifier: Option<String>,
|
||||
pub bedrooms: Option<i32>,
|
||||
pub bathrooms: Option<i32>,
|
||||
pub rooms_total: Option<i32>,
|
||||
pub floor_area_sqm: Option<f32>,
|
||||
pub asking_price: Option<i64>,
|
||||
pub asking_price_per_sqm: Option<f32>,
|
||||
pub listing_url: String,
|
||||
pub listing_status: Option<String>,
|
||||
pub listing_date_iso: Option<String>,
|
||||
pub features: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct ActualListingData {
|
||||
pub lat: Vec<f32>,
|
||||
pub lon: Vec<f32>,
|
||||
/// Normalized (uppercase, canonical spacing) postcode per row.
|
||||
pub postcode: Vec<String>,
|
||||
pub address: Vec<Option<String>>,
|
||||
pub property_type: InternedColumn,
|
||||
pub property_sub_type: InternedColumn,
|
||||
pub leasehold_freehold: InternedColumn,
|
||||
pub price_qualifier: InternedColumn,
|
||||
pub bedrooms: Vec<Option<i32>>,
|
||||
pub bathrooms: Vec<Option<i32>>,
|
||||
pub rooms_total: Vec<Option<i32>>,
|
||||
pub floor_area_sqm: Vec<Option<f32>>,
|
||||
pub asking_price: Vec<Option<i64>>,
|
||||
pub asking_price_per_sqm: Vec<Option<f32>>,
|
||||
pub listing_url: Vec<String>,
|
||||
pub listing_status: InternedColumn,
|
||||
pub listing_date_iso: Vec<Option<String>>,
|
||||
pub features: Vec<Vec<String>>,
|
||||
pub grid: GridIndex,
|
||||
}
|
||||
|
||||
impl ActualListingData {
|
||||
pub fn load(parquet_path: &Path) -> Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
}
|
||||
|
||||
fn load_inner(parquet_path: &Path) -> Result<Self> {
|
||||
info!("Loading actual listings from {:?}", parquet_path);
|
||||
let pl_path = PlRefPath::try_from_path(parquet_path)
|
||||
.context("Failed to normalize actual listings parquet path")?;
|
||||
let df = LazyFrame::scan_parquet(pl_path, Default::default())
|
||||
.context("Failed to scan actual listings parquet")?
|
||||
.collect()
|
||||
.context("Failed to read actual listings parquet")?;
|
||||
|
||||
let row_count = df.height();
|
||||
info!(rows = row_count, "Actual listings parquet read");
|
||||
|
||||
let lat = extract_f32(&df, "lat")?;
|
||||
let lon = extract_f32(&df, "lon")?;
|
||||
let postcode_raw = extract_str(&df, "Postcode")?;
|
||||
let address = extract_opt_str(&df, "Address per Property Register")?;
|
||||
let property_type_raw = extract_opt_str(&df, "Property type")?;
|
||||
let property_sub_type_raw = extract_opt_str(&df, "Property sub-type")?;
|
||||
let leasehold_freehold_raw = extract_opt_str(&df, "Leasehold/Freehold")?;
|
||||
let price_qualifier_raw = extract_opt_str(&df, "Price qualifier")?;
|
||||
let bedrooms = extract_opt_i32(&df, "Bedrooms")?;
|
||||
let bathrooms = extract_opt_i32(&df, "Bathrooms")?;
|
||||
let rooms_total = extract_opt_i32(&df, "Number of bedrooms & living rooms")?;
|
||||
let floor_area_sqm = extract_opt_f32(&df, "Total floor area (sqm)")?;
|
||||
let asking_price = extract_opt_i64(&df, "Asking price")?;
|
||||
let asking_price_per_sqm = extract_opt_f32(&df, "Asking price per sqm")?;
|
||||
let listing_url = extract_str(&df, "Listing URL")?;
|
||||
let listing_status_raw = extract_opt_str(&df, "Listing status")?;
|
||||
let listing_date_iso = extract_opt_datetime_iso(&df, "Listing date")?;
|
||||
let features = extract_str_list(&df, "Listing features")?;
|
||||
|
||||
let postcode: Vec<String> = postcode_raw.iter().map(|s| normalize_postcode(s)).collect();
|
||||
|
||||
let property_type = InternedColumn::build(&opt_to_string(&property_type_raw));
|
||||
let property_sub_type = InternedColumn::build(&opt_to_string(&property_sub_type_raw));
|
||||
let leasehold_freehold = InternedColumn::build(&opt_to_string(&leasehold_freehold_raw));
|
||||
let price_qualifier = InternedColumn::build(&opt_to_string(&price_qualifier_raw));
|
||||
let listing_status = InternedColumn::build(&opt_to_string(&listing_status_raw));
|
||||
|
||||
let grid = GridIndex::build(&lat, &lon, GRID_CELL_SIZE);
|
||||
|
||||
info!(rows = row_count, "Actual listings loaded");
|
||||
|
||||
Ok(Self {
|
||||
lat,
|
||||
lon,
|
||||
postcode,
|
||||
address,
|
||||
property_type,
|
||||
property_sub_type,
|
||||
leasehold_freehold,
|
||||
price_qualifier,
|
||||
bedrooms,
|
||||
bathrooms,
|
||||
rooms_total,
|
||||
floor_area_sqm,
|
||||
asking_price,
|
||||
asking_price_per_sqm,
|
||||
listing_url,
|
||||
listing_status,
|
||||
listing_date_iso,
|
||||
features,
|
||||
grid,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn listing_at(&self, row: usize) -> ActualListing {
|
||||
ActualListing {
|
||||
lat: self.lat[row],
|
||||
lon: self.lon[row],
|
||||
postcode: self.postcode[row].clone(),
|
||||
address: self.address[row].clone(),
|
||||
property_type: opt_from_interned(&self.property_type, row),
|
||||
property_sub_type: opt_from_interned(&self.property_sub_type, row),
|
||||
leasehold_freehold: opt_from_interned(&self.leasehold_freehold, row),
|
||||
price_qualifier: opt_from_interned(&self.price_qualifier, row),
|
||||
bedrooms: self.bedrooms[row],
|
||||
bathrooms: self.bathrooms[row],
|
||||
rooms_total: self.rooms_total[row],
|
||||
floor_area_sqm: self.floor_area_sqm[row],
|
||||
asking_price: self.asking_price[row],
|
||||
asking_price_per_sqm: self.asking_price_per_sqm[row],
|
||||
listing_url: self.listing_url[row].clone(),
|
||||
listing_status: opt_from_interned(&self.listing_status, row),
|
||||
listing_date_iso: self.listing_date_iso[row].clone(),
|
||||
features: self.features[row].clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn opt_to_string(values: &[Option<String>]) -> Vec<String> {
|
||||
values
|
||||
.iter()
|
||||
.map(|value| value.clone().unwrap_or_default())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn opt_from_interned(column: &InternedColumn, row: usize) -> Option<String> {
|
||||
let value = column.get(row);
|
||||
if value.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_f32(df: &DataFrame, name: &str) -> Result<Vec<f32>> {
|
||||
let cast = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast '{name}' to Float32"))?;
|
||||
let column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not Float32"))?;
|
||||
column
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| value.with_context(|| format!("Column '{name}' has null at row {row}")))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_str(df: &DataFrame, name: &str) -> Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?;
|
||||
let strings = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
strings
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
value
|
||||
.map(ToString::to_string)
|
||||
.with_context(|| format!("Column '{name}' has null at row {row}"))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_opt_str(df: &DataFrame, name: &str) -> Result<Vec<Option<String>>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?;
|
||||
let strings = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(strings
|
||||
.into_iter()
|
||||
.map(|value| value.and_then(|text| (!text.is_empty()).then(|| text.to_string())))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn extract_opt_i32(df: &DataFrame, name: &str) -> Result<Vec<Option<i32>>> {
|
||||
let cast = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?
|
||||
.cast(&DataType::Int32)
|
||||
.with_context(|| format!("Failed to cast '{name}' to Int32"))?;
|
||||
let column = cast
|
||||
.i32()
|
||||
.with_context(|| format!("Column '{name}' is not Int32"))?;
|
||||
Ok(column.into_iter().collect())
|
||||
}
|
||||
|
||||
fn extract_opt_i64(df: &DataFrame, name: &str) -> Result<Vec<Option<i64>>> {
|
||||
let cast = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?
|
||||
.cast(&DataType::Int64)
|
||||
.with_context(|| format!("Failed to cast '{name}' to Int64"))?;
|
||||
let column = cast
|
||||
.i64()
|
||||
.with_context(|| format!("Column '{name}' is not Int64"))?;
|
||||
Ok(column.into_iter().collect())
|
||||
}
|
||||
|
||||
fn extract_opt_f32(df: &DataFrame, name: &str) -> Result<Vec<Option<f32>>> {
|
||||
let cast = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast '{name}' to Float32"))?;
|
||||
let column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not Float32"))?;
|
||||
Ok(column
|
||||
.into_iter()
|
||||
.map(|value| value.filter(|v| v.is_finite()))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn extract_opt_datetime_iso(df: &DataFrame, name: &str) -> Result<Vec<Option<String>>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?;
|
||||
let cast = column
|
||||
.cast(&DataType::Datetime(TimeUnit::Microseconds, None))
|
||||
.with_context(|| format!("Failed to cast '{name}' to Datetime(us)"))?;
|
||||
let datetime = cast
|
||||
.datetime()
|
||||
.with_context(|| format!("Column '{name}' is not a Datetime column"))?;
|
||||
Ok(datetime
|
||||
.as_datetime_iter()
|
||||
.map(|value| value.map(|date| date.format("%Y-%m-%dT%H:%M:%SZ").to_string()))
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn sample_path() -> Option<PathBuf> {
|
||||
let path = PathBuf::from("../finder/data/online_listings_buy.parquet");
|
||||
path.exists().then_some(path)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn loads_sample_listings_when_available() {
|
||||
let Some(path) = sample_path() else {
|
||||
eprintln!("sample parquet not present; skipping");
|
||||
return;
|
||||
};
|
||||
|
||||
let data = ActualListingData::load(&path).expect("listings load");
|
||||
assert!(!data.lat.is_empty());
|
||||
assert_eq!(data.lat.len(), data.lon.len());
|
||||
assert_eq!(data.lat.len(), data.postcode.len());
|
||||
assert_eq!(data.lat.len(), data.listing_url.len());
|
||||
assert_eq!(data.lat.len(), data.features.len());
|
||||
|
||||
let any_listing = data.listing_at(0);
|
||||
assert!(any_listing.lat.is_finite());
|
||||
assert!(any_listing.lon.is_finite());
|
||||
assert!(!any_listing.listing_url.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_str_list(df: &DataFrame, name: &str) -> Result<Vec<Vec<String>>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}'"))?;
|
||||
let list = column
|
||||
.list()
|
||||
.with_context(|| format!("Column '{name}' is not a list column"))?;
|
||||
let mut out = Vec::with_capacity(list.len());
|
||||
for series_opt in list.into_iter() {
|
||||
let entries = match series_opt {
|
||||
Some(series) => {
|
||||
let strings = series.str().with_context(|| {
|
||||
format!("Column '{name}' list inner is not a string column")
|
||||
})?;
|
||||
strings
|
||||
.into_iter()
|
||||
.filter_map(|value| value.map(ToString::to_string))
|
||||
.collect()
|
||||
}
|
||||
None => Vec::new(),
|
||||
};
|
||||
out.push(entries);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
|
@ -21,15 +21,21 @@ pub struct PlaceData {
|
|||
pub travel_destination: Vec<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(super) struct CityCandidate<'a> {
|
||||
pub(super) name: &'a str,
|
||||
pub(super) lat: f32,
|
||||
pub(super) lon: f32,
|
||||
name: &'a str,
|
||||
lat: f32,
|
||||
lon: f32,
|
||||
population: u32,
|
||||
max_dist_sq: f32,
|
||||
}
|
||||
|
||||
const PARENT_CITY_MAX_DIST_SQ: f32 = 0.81;
|
||||
const LONDON_DISPLAY_MAX_DEGREES: f32 = 30.0 / 111.0;
|
||||
const LONDON_DISPLAY_MAX_DIST_SQ: f32 = LONDON_DISPLAY_MAX_DEGREES * LONDON_DISPLAY_MAX_DEGREES;
|
||||
const SUBSUMED_CITY_MAX_DEGREES: f32 = 5.0 / 111.0;
|
||||
const SUBSUMED_CITY_MAX_DIST_SQ: f32 = SUBSUMED_CITY_MAX_DEGREES * SUBSUMED_CITY_MAX_DEGREES;
|
||||
const SUBSUMED_CITY_MIN_POPULATION_RATIO: u32 = 10;
|
||||
|
||||
fn type_rank(place_type: &str) -> u8 {
|
||||
match place_type {
|
||||
|
|
@ -47,15 +53,80 @@ pub fn is_travel_destination_type(place_type: &str) -> bool {
|
|||
matches!(place_type, "city" | "station" | "university")
|
||||
}
|
||||
|
||||
fn distance_sq(lat: f32, lon: f32, city: &CityCandidate<'_>) -> f32 {
|
||||
let cos_lat = lat.to_radians().cos();
|
||||
let dlat = city.lat - lat;
|
||||
let dlon = (city.lon - lon) * cos_lat;
|
||||
dlat * dlat + dlon * dlon
|
||||
impl<'a> CityCandidate<'a> {
|
||||
fn from_place(name: &'a str, lat: f32, lon: f32, population: u32) -> Self {
|
||||
let max_dist_sq = if name == "London" {
|
||||
LONDON_DISPLAY_MAX_DIST_SQ
|
||||
} else {
|
||||
PARENT_CITY_MAX_DIST_SQ
|
||||
};
|
||||
|
||||
Self {
|
||||
name,
|
||||
lat,
|
||||
lon,
|
||||
population,
|
||||
max_dist_sq,
|
||||
}
|
||||
}
|
||||
|
||||
fn distance_sq(&self, lat: f32, lon: f32, cos_lat: f32) -> f32 {
|
||||
let dlat = self.lat - lat;
|
||||
let dlon = (self.lon - lon) * cos_lat;
|
||||
dlat * dlat + dlon * dlon
|
||||
}
|
||||
|
||||
fn is_subsumed_by(&self, other: &Self) -> bool {
|
||||
if self.population == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let min_parent_population =
|
||||
u64::from(self.population) * u64::from(SUBSUMED_CITY_MIN_POPULATION_RATIO);
|
||||
if u64::from(other.population) < min_parent_population {
|
||||
return false;
|
||||
}
|
||||
|
||||
other.distance_sq(self.lat, self.lon, self.lat.to_radians().cos())
|
||||
< SUBSUMED_CITY_MAX_DIST_SQ
|
||||
}
|
||||
}
|
||||
|
||||
fn is_london_city_name(name: &str) -> bool {
|
||||
matches!(name, "London" | "Westminster" | "City of London")
|
||||
pub(super) fn display_city_candidates<'a>(
|
||||
names: &'a [String],
|
||||
type_rank: &[u8],
|
||||
population: &[u32],
|
||||
lat: &[f32],
|
||||
lon: &[f32],
|
||||
) -> Vec<CityCandidate<'a>> {
|
||||
let cities: Vec<CityCandidate<'_>> = type_rank
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| {
|
||||
if rank == 0 {
|
||||
Some(CityCandidate::from_place(
|
||||
&names[idx],
|
||||
lat[idx],
|
||||
lon[idx],
|
||||
population[idx],
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
cities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, city)| {
|
||||
let is_subsumed = cities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.any(|(other_idx, other)| other_idx != idx && city.is_subsumed_by(other));
|
||||
(!is_subsumed).then_some(*city)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(super) fn nearest_display_city<'a>(
|
||||
|
|
@ -63,35 +134,13 @@ pub(super) fn nearest_display_city<'a>(
|
|||
lon: f32,
|
||||
cities: &'a [CityCandidate<'a>],
|
||||
) -> Option<&'a str> {
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&CityCandidate<'_>> = None;
|
||||
let mut london_dist_sq: Option<f32> = None;
|
||||
let cos_lat = lat.to_radians().cos();
|
||||
let (best_city, best_dist_sq) = cities
|
||||
.iter()
|
||||
.map(|city| (city, city.distance_sq(lat, lon, cos_lat)))
|
||||
.min_by(|(_, lhs), (_, rhs)| lhs.total_cmp(rhs))?;
|
||||
|
||||
for city in cities {
|
||||
let dist_sq = distance_sq(lat, lon, city);
|
||||
if city.name == "London" {
|
||||
london_dist_sq = Some(dist_sq);
|
||||
}
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(city);
|
||||
}
|
||||
}
|
||||
|
||||
let best_city = best_city?;
|
||||
if best_dist_sq >= PARENT_CITY_MAX_DIST_SQ {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_london_city_name(best_city.name) {
|
||||
if london_dist_sq.is_some_and(|dist_sq| dist_sq < LONDON_DISPLAY_MAX_DIST_SQ) {
|
||||
Some("London")
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
Some(best_city.name)
|
||||
}
|
||||
(best_dist_sq < best_city.max_dist_sq).then_some(best_city.name)
|
||||
}
|
||||
|
||||
pub fn normalize_search_text(text: &str) -> String {
|
||||
|
|
@ -306,19 +355,8 @@ impl PlaceData {
|
|||
let display_city_override = extract_optional_str_col(&df, "display_city")?;
|
||||
|
||||
// Precompute nearest city for each non-city place
|
||||
let city_indices: Vec<usize> = type_rank_vec
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
let city_candidates: Vec<CityCandidate<'_>> = city_indices
|
||||
.iter()
|
||||
.map(|&idx| CityCandidate {
|
||||
name: &name[idx],
|
||||
lat: lat[idx],
|
||||
lon: lon[idx],
|
||||
})
|
||||
.collect();
|
||||
let city_candidates =
|
||||
display_city_candidates(&name, &type_rank_vec, &population, &lat, &lon);
|
||||
|
||||
let fallback_city: Vec<Option<String>> = (0..row_count)
|
||||
.map(|idx| {
|
||||
|
|
@ -379,36 +417,41 @@ impl PlaceData {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
|
||||
vec![
|
||||
CityCandidate {
|
||||
name: "London",
|
||||
lat: 51.5074456,
|
||||
lon: -0.1277653,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Westminster",
|
||||
lat: 51.4973206,
|
||||
lon: -0.137149,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "City of London",
|
||||
lat: 51.5156177,
|
||||
lon: -0.0919983,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Cambridge",
|
||||
lat: 52.2055314,
|
||||
lon: 0.1186637,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Oxford",
|
||||
lat: 51.7520131,
|
||||
lon: -1.2578499,
|
||||
},
|
||||
fn test_city_rows() -> [(&'static str, f32, f32, u32); 5] {
|
||||
[
|
||||
("London", 51.5074456, -0.1277653, 8_908_083),
|
||||
("Westminster", 51.4973206, -0.137149, 211_365),
|
||||
("City of London", 51.5156177, -0.0919983, 10_847),
|
||||
("Cambridge", 52.2055314, 0.1186637, 145_818),
|
||||
("Oxford", 51.7520131, -1.2578499, 165_000),
|
||||
]
|
||||
}
|
||||
|
||||
fn all_test_city_candidates() -> Vec<CityCandidate<'static>> {
|
||||
test_city_rows()
|
||||
.into_iter()
|
||||
.map(|(name, lat, lon, population)| {
|
||||
CityCandidate::from_place(name, lat, lon, population)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
|
||||
let cities = all_test_city_candidates();
|
||||
|
||||
cities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, city)| {
|
||||
let is_subsumed = cities
|
||||
.iter()
|
||||
.enumerate()
|
||||
.any(|(other_idx, other)| other_idx != idx && city.is_subsumed_by(other));
|
||||
(!is_subsumed).then_some(*city)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_rank_ordering() {
|
||||
assert!(type_rank("city") < type_rank("town"));
|
||||
|
|
@ -433,7 +476,30 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_canonicalizes_greater_london_aliases() {
|
||||
fn display_city_candidates_drop_city_nodes_subsumed_by_much_larger_nearby_city() {
|
||||
let rows = test_city_rows();
|
||||
let names: Vec<String> = rows
|
||||
.iter()
|
||||
.map(|(name, _, _, _)| name.to_string())
|
||||
.collect();
|
||||
let type_rank: Vec<u8> = vec![0; rows.len()];
|
||||
let population: Vec<u32> = rows
|
||||
.iter()
|
||||
.map(|(_, _, _, population)| *population)
|
||||
.collect();
|
||||
let lat: Vec<f32> = rows.iter().map(|(_, lat, _, _)| *lat).collect();
|
||||
let lon: Vec<f32> = rows.iter().map(|(_, _, lon, _)| *lon).collect();
|
||||
|
||||
let cities = display_city_candidates(&names, &type_rank, &population, &lat, &lon);
|
||||
|
||||
assert_eq!(
|
||||
cities.iter().map(|city| city.name).collect::<Vec<_>>(),
|
||||
["London", "Cambridge", "Oxford"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_labels_inner_greater_london_from_london_candidate() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -453,7 +519,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_does_not_leak_westminster_label_past_london_guard() {
|
||||
fn nearest_display_city_does_not_extend_london_past_its_display_radius() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(nearest_display_city(51.5093, -0.5954, &cities), None);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use std::fs;
|
|||
use std::path::Path;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::places::{nearest_display_city, CityCandidate};
|
||||
use super::places::{display_city_candidates, nearest_display_city};
|
||||
use super::PlaceData;
|
||||
|
||||
/// Precomputed outcode data derived from postcode boundaries.
|
||||
|
|
@ -53,20 +53,13 @@ impl OutcodeData {
|
|||
let centroids: Vec<(f32, f32)> = entries.iter().map(|(_, c)| *c).collect();
|
||||
|
||||
// Compute nearest city for each outcode (same algorithm as PlaceData)
|
||||
let city_indices: Vec<usize> = place_data
|
||||
.type_rank
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
let city_candidates: Vec<CityCandidate<'_>> = city_indices
|
||||
.iter()
|
||||
.map(|&idx| CityCandidate {
|
||||
name: &place_data.name[idx],
|
||||
lat: place_data.lat[idx],
|
||||
lon: place_data.lon[idx],
|
||||
})
|
||||
.collect();
|
||||
let city_candidates = display_city_candidates(
|
||||
&place_data.name,
|
||||
&place_data.type_rank,
|
||||
&place_data.population,
|
||||
&place_data.lat,
|
||||
&place_data.lon,
|
||||
);
|
||||
|
||||
let cities: Vec<Option<String>> = centroids
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -202,6 +202,10 @@ struct Cli {
|
|||
#[arg(long, env = "TRAVEL_TIMES")]
|
||||
travel_times: PathBuf,
|
||||
|
||||
/// Optional path to a parquet of live online listings (Rightmove etc.) to overlay on the map.
|
||||
#[arg(long, env = "ACTUAL_LISTINGS_PATH")]
|
||||
actual_listings_path: Option<PathBuf>,
|
||||
|
||||
/// Google Maps API key for Street View metadata lookups
|
||||
#[arg(long, env = "GOOGLE_MAPS_API_KEY")]
|
||||
google_maps_api_key: String,
|
||||
|
|
@ -531,6 +535,20 @@ async fn main() -> anyhow::Result<()> {
|
|||
let superuser_token_cache = Arc::new(pocketbase::SuperuserTokenCache::new());
|
||||
let share_cache = Arc::new(licensing::ShareBoundsCache::new());
|
||||
|
||||
let actual_listings = if let Some(path) = cli.actual_listings_path.as_ref() {
|
||||
if !path.exists() {
|
||||
bail!("Actual listings parquet not found: {}", path.display());
|
||||
}
|
||||
info!("Loading actual listings from {}", path.display());
|
||||
let listings = data::ActualListingData::load(path)?;
|
||||
trim_allocator("actual listings load");
|
||||
info!(rows = listings.lat.len(), "Actual listings loaded");
|
||||
Some(Arc::new(listings))
|
||||
} else {
|
||||
info!("ACTUAL_LISTINGS_PATH not set; live listings overlay disabled");
|
||||
None
|
||||
};
|
||||
|
||||
let app_state = AppState {
|
||||
data: property_data,
|
||||
grid,
|
||||
|
|
@ -556,6 +574,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
gemini_api_key: cli.gemini_api_key,
|
||||
gemini_model: cli.gemini_model,
|
||||
travel_time_store,
|
||||
actual_listings,
|
||||
token_cache,
|
||||
superuser_token_cache,
|
||||
share_cache,
|
||||
|
|
@ -617,6 +636,10 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/pois",
|
||||
get(routes::get_pois).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/actual-listings",
|
||||
get(routes::get_actual_listings).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/poi-categories",
|
||||
get(routes::get_poi_categories).layer(ConcurrencyLimitLayer::new(20)),
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
mod actual_listings;
|
||||
mod ai_filters;
|
||||
mod checkout;
|
||||
mod export;
|
||||
|
|
@ -29,6 +30,7 @@ mod travel_destinations;
|
|||
mod travel_modes;
|
||||
pub(crate) mod travel_time;
|
||||
|
||||
pub use actual_listings::get_actual_listings;
|
||||
pub use ai_filters::{build_system_prompt, post_ai_filters};
|
||||
pub use checkout::post_checkout;
|
||||
pub use export::get_export;
|
||||
|
|
|
|||
80
server-rs/src/routes/actual_listings.rs
Normal file
80
server-rs/src/routes/actual_listings.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::{Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
|
||||
use crate::data::ActualListing;
|
||||
use crate::parsing::require_bounds;
|
||||
use crate::state::SharedState;
|
||||
|
||||
const MAX_RESULTS: usize = 5000;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ActualListingsParams {
|
||||
bounds: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct ActualListingsResponse {
|
||||
pub listings: Vec<ActualListing>,
|
||||
pub total: usize,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
pub async fn get_actual_listings(
|
||||
State(shared): State<Arc<SharedState>>,
|
||||
Query(params): Query<ActualListingsParams>,
|
||||
) -> Result<Json<ActualListingsResponse>, (StatusCode, String)> {
|
||||
let state = shared.load_state();
|
||||
let Some(actual_listings) = state.actual_listings.clone() else {
|
||||
return Ok(Json(ActualListingsResponse {
|
||||
listings: Vec::new(),
|
||||
total: 0,
|
||||
truncated: false,
|
||||
}));
|
||||
};
|
||||
let (south, west, north, east) = require_bounds(params.bounds)?;
|
||||
|
||||
let response = tokio::task::spawn_blocking(move || {
|
||||
let t0 = std::time::Instant::now();
|
||||
let row_indices = actual_listings.grid.query(south, west, north, east);
|
||||
let total = row_indices.len();
|
||||
let truncated = total > MAX_RESULTS;
|
||||
|
||||
let mut listings: Vec<ActualListing> = row_indices
|
||||
.iter()
|
||||
.take(MAX_RESULTS)
|
||||
.map(|&row| actual_listings.listing_at(row as usize))
|
||||
.collect();
|
||||
|
||||
// Sort newest first so the most relevant pins win when the viewport is busy.
|
||||
listings.sort_by(|left, right| {
|
||||
right
|
||||
.listing_date_iso
|
||||
.cmp(&left.listing_date_iso)
|
||||
.then_with(|| right.asking_price.cmp(&left.asking_price))
|
||||
});
|
||||
|
||||
let elapsed = t0.elapsed();
|
||||
info!(
|
||||
results = listings.len(),
|
||||
total,
|
||||
truncated,
|
||||
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||
"GET /api/actual-listings"
|
||||
);
|
||||
|
||||
ActualListingsResponse {
|
||||
listings,
|
||||
total,
|
||||
truncated,
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
|
@ -6,7 +6,8 @@ use rustc_hash::FxHashMap;
|
|||
use crate::auth::TokenCache;
|
||||
use crate::bugsink::FrontendConfig as BugsinkFrontendConfig;
|
||||
use crate::data::{
|
||||
OutcodeData, POICategoryGroup, POIData, PlaceData, PostcodeData, PropertyData, TravelTimeStore,
|
||||
ActualListingData, OutcodeData, POICategoryGroup, POIData, PlaceData, PostcodeData,
|
||||
PropertyData, TravelTimeStore,
|
||||
};
|
||||
use crate::licensing::ShareBoundsCache;
|
||||
use crate::pocketbase::SuperuserTokenCache;
|
||||
|
|
@ -43,6 +44,8 @@ pub struct AppState {
|
|||
pub poi_category_groups: Arc<Vec<POICategoryGroup>>,
|
||||
/// Precomputed travel time data store
|
||||
pub travel_time_store: Arc<TravelTimeStore>,
|
||||
/// Optional real-world listings (e.g. Rightmove / Zoopla data) loaded from ACTUAL_LISTINGS_PATH.
|
||||
pub actual_listings: Option<Arc<ActualListingData>>,
|
||||
/// Token validation cache (60s TTL)
|
||||
pub token_cache: Arc<TokenCache>,
|
||||
/// Cached PocketBase superuser token (10min TTL) to avoid rate-limiting
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue