Lots of improvements
This commit is contained in:
parent
ef921361ec
commit
80a5a2a774
21 changed files with 489 additions and 337 deletions
|
|
@ -464,7 +464,7 @@ impl PropertyData {
|
|||
tracing::info!("Concatenating all data sources");
|
||||
let buy_count = listings_buy.height();
|
||||
let rent_count = listings_rent.height();
|
||||
let mut combined = concat(
|
||||
let combined = concat(
|
||||
[
|
||||
properties_joined.lazy(),
|
||||
listings_buy.lazy(),
|
||||
|
|
@ -495,36 +495,8 @@ impl PropertyData {
|
|||
let numeric_names = features::all_numeric_feature_names();
|
||||
let enum_names = features::all_enum_feature_names();
|
||||
|
||||
// Fill in NaN/empty placeholder columns for features that don't exist in all
|
||||
// sources (e.g. Listing date only comes from listings, Estimated current price
|
||||
// only from properties). Without this, diagonal concat leaves them absent.
|
||||
{
|
||||
let schema = combined.schema();
|
||||
let mut fill_exprs: Vec<Expr> = Vec::new();
|
||||
for &name in &numeric_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding NaN placeholder for missing numeric feature");
|
||||
fill_exprs.push(lit(f32::NAN).alias(name));
|
||||
}
|
||||
}
|
||||
for &name in &enum_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding empty placeholder for missing enum feature");
|
||||
fill_exprs.push(lit("").alias(name));
|
||||
}
|
||||
}
|
||||
if !fill_exprs.is_empty() {
|
||||
combined = combined
|
||||
.lazy()
|
||||
.with_columns(fill_exprs)
|
||||
.collect()
|
||||
.context("Failed to add placeholder columns for missing features")?;
|
||||
}
|
||||
}
|
||||
|
||||
let schema = combined.schema();
|
||||
|
||||
// Validate: every configured feature exists in combined schema
|
||||
for name in &numeric_names {
|
||||
match schema.get(name) {
|
||||
Some(dtype) if is_numeric_dtype(dtype) => {}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 10000.0,
|
||||
description: "Inflation-adjusted estimate of the current property value",
|
||||
detail: "Estimated by applying a repeat-sales price index to the last known sale price, plus a renovation premium for properties with post-sale improvements detected from EPC records (extensions, renovations, remodeling). The index tracks price changes within each postcode sector and property type. Renovation premiums are estimated per area from observed repeat-sale pairs and decay over time. Properties sold recently will have estimates close to their sale price; older sales are adjusted more.",
|
||||
detail: "Estimated by applying a repeat-sales price index to the last known sale price, plus a renovation premium for properties with post-sale improvements detected from EPC records (extensions, renovations, remodelling). The index tracks price changes within each postcode sector and property type. Renovation premiums are estimated per area from observed repeat-sale pairs and decay over time. Properties sold recently will have estimates close to their sale price; older sales are adjusted more.",
|
||||
source: "price-paid",
|
||||
prefix: "£",
|
||||
suffix: "",
|
||||
|
|
@ -259,7 +259,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 50.0,
|
||||
description: "Listed monthly rent for properties currently for rent",
|
||||
detail: "The advertised rental price normalized to monthly for properties currently listed for rent on online property portals. Weekly rents are converted (×52/12), yearly (/12), daily (×365.25/12), and quarterly (/3). Only populated for 'For rent' listings.",
|
||||
detail: "The advertised rental price normalised to monthly for properties currently listed for rent on online property portals. Weekly rents are converted (×52/12), yearly (/12), daily (×365.25/12), and quarterly (/3). Only populated for 'For rent' listings.",
|
||||
source: "online-listings",
|
||||
prefix: "£",
|
||||
suffix: "/mo",
|
||||
|
|
@ -325,82 +325,14 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
name: "Transport",
|
||||
features: &[
|
||||
FeatureConfig {
|
||||
name: "Public transport to Bank (mins)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 180.0,
|
||||
},
|
||||
step: 2.0,
|
||||
description: "Public transport journey time to Bank station",
|
||||
detail: "Journey time in minutes by public transport to Bank station in the City of London, using TfL's Journey Planner API. Calculated for weekday morning commute times.",
|
||||
source: "tfl-journey-times",
|
||||
prefix: "",
|
||||
suffix: " mins",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Public transport to Fitzrovia (mins)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 180.0,
|
||||
},
|
||||
step: 2.0,
|
||||
description: "Public transport journey time to Fitzrovia",
|
||||
detail: "Journey time in minutes by public transport to Fitzrovia in central London, using TfL's Journey Planner API. Calculated for weekday morning commute times.",
|
||||
source: "tfl-journey-times",
|
||||
prefix: "",
|
||||
suffix: " mins",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Cycling to Bank (mins)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 180.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Cycling time to Bank station",
|
||||
detail: "Cycling journey time in minutes to Bank station, as calculated by the TfL Journey Planner API. Uses TfL's default cycling speed and route preferences.",
|
||||
source: "tfl-journey-times",
|
||||
prefix: "",
|
||||
suffix: " mins",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Cycling to Fitzrovia (mins)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 180.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Cycling time to Fitzrovia",
|
||||
detail: "Cycling journey time in minutes to Fitzrovia, as calculated by the TfL Journey Planner API. Uses TfL's default cycling speed and route preferences.",
|
||||
source: "tfl-journey-times",
|
||||
prefix: "",
|
||||
suffix: " mins",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Number of public transport stations within 2km",
|
||||
name: "Train or tube stations within 1km",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 5.0,
|
||||
high: 95.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Number of public transport stops within 2km",
|
||||
detail: "Count of bus stops, rail stations, tube stations, tram stops, and other public transport access points within a 2km radius of the property's postcode. Derived from the NaPTAN (National Public Transport Access Nodes) dataset.",
|
||||
description: "Number of train or tube stations within 1km",
|
||||
detail: "Count of rail stations and Tube/metro/tram stops within a 1km radius of the property's postcode. Derived from the NaPTAN (National Public Transport Access Nodes) dataset. Does not include bus stops.",
|
||||
source: "naptan",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
|
|
@ -409,6 +341,23 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Distance to nearest train or tube station (km)",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.1,
|
||||
description: "Distance to the closest train or tube station",
|
||||
detail: "Straight-line distance in kilometres from the property's postcode centroid to the nearest rail station or Tube/metro/tram stop. Derived from the NaPTAN (National Public Transport Access Nodes) dataset.",
|
||||
source: "naptan",
|
||||
prefix: "",
|
||||
suffix: " km",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -906,14 +855,31 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "% Asian",
|
||||
name: "% South Asian",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Percentage of population identifying as Asian",
|
||||
detail: "From the 2021 Census. Percentage of the local authority population identifying as Asian or Asian British (Indian, Pakistani, Bangladeshi, Chinese, or any other Asian background).",
|
||||
description: "Percentage of population identifying as South Asian",
|
||||
detail: "From the 2021 Census. Percentage of the local authority population identifying as Indian, Pakistani, Bangladeshi, or any other Asian background.",
|
||||
source: "ethnicity",
|
||||
prefix: "",
|
||||
suffix: "%",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "% East Asian",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Percentage of population identifying as East Asian",
|
||||
detail: "From the 2021 Census. Percentage of the local authority population identifying as Chinese.",
|
||||
source: "ethnicity",
|
||||
prefix: "",
|
||||
suffix: "%",
|
||||
|
|
@ -1074,7 +1040,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
|
||||
pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
||||
EnumFeatureGroup {
|
||||
name: "Property",
|
||||
name: "Properties in the area",
|
||||
features: &[
|
||||
EnumFeatureConfig {
|
||||
name: "Listing status",
|
||||
|
|
@ -1084,7 +1050,7 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
source: "online-listings",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Leashold/Freehold",
|
||||
name: "Leasehold/Freehold",
|
||||
order: Some(&["Freehold", "Leasehold"]),
|
||||
description: "Whether the property is leasehold or freehold",
|
||||
detail: "From HM Land Registry Price Paid data. Freehold means you own the building and the land it stands on. Leasehold means you own the building but not the land — you have a lease from the freeholder for a set number of years.",
|
||||
|
|
|
|||
|
|
@ -417,16 +417,16 @@ async fn main() -> anyhow::Result<()> {
|
|||
let state_short_url = state.clone();
|
||||
let state_ai_filters = state.clone();
|
||||
let state_streetview = state.clone();
|
||||
let state_subscription = state.clone();
|
||||
let state_newsletter = state.clone();
|
||||
let state_travel_modes = state.clone();
|
||||
let state_travel_destinations = state.clone();
|
||||
let state_checkout = state.clone();
|
||||
let state_stripe_webhook = state.clone();
|
||||
let state_pricing = state.clone();
|
||||
let state_invites_create = state.clone();
|
||||
let state_invite_get = state.clone();
|
||||
let state_redeem_invite = state.clone();
|
||||
let state_rightmove = state.clone();
|
||||
let state_journey = state.clone();
|
||||
|
||||
let api = Router::new()
|
||||
.route(
|
||||
|
|
@ -461,6 +461,14 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/travel-modes",
|
||||
get(move || routes::get_travel_modes(state_travel_modes.clone())),
|
||||
)
|
||||
.route(
|
||||
"/api/travel-destinations",
|
||||
get(move |query| routes::get_travel_destinations(state_travel_destinations.clone(), query)),
|
||||
)
|
||||
.route(
|
||||
"/api/journey",
|
||||
get(move |query| routes::get_journey(state_journey.clone(), query)),
|
||||
)
|
||||
.route(
|
||||
"/api/hexagon-properties",
|
||||
get(move |ext, query| {
|
||||
|
|
@ -502,16 +510,6 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/streetview",
|
||||
get(move |query| routes::get_streetview(state_streetview.clone(), query)),
|
||||
)
|
||||
.route(
|
||||
"/api/rightmove-location",
|
||||
get(move |query| routes::get_rightmove_typeahead(state_rightmove.clone(), query)),
|
||||
)
|
||||
.route(
|
||||
"/api/subscription",
|
||||
patch(move |ext, body| {
|
||||
routes::patch_subscription(state_subscription.clone(), ext, body)
|
||||
}),
|
||||
)
|
||||
.route(
|
||||
"/api/newsletter",
|
||||
patch(move |ext, body| {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ mod features;
|
|||
mod hexagon_stats;
|
||||
pub(crate) mod hexagons;
|
||||
mod invites;
|
||||
mod journey;
|
||||
mod me;
|
||||
mod pb_proxy;
|
||||
mod places;
|
||||
|
|
@ -20,10 +21,9 @@ mod streetview;
|
|||
mod stripe_webhook;
|
||||
mod newsletter;
|
||||
pub(crate) mod pricing;
|
||||
mod rightmove_typeahead;
|
||||
mod subscription;
|
||||
mod tiles;
|
||||
pub(crate) mod travel_time;
|
||||
mod travel_destinations;
|
||||
mod travel_modes;
|
||||
|
||||
pub use ai_filters::{build_ollama_schema, build_system_prompt, post_ai_filters};
|
||||
|
|
@ -44,10 +44,10 @@ pub use screenshot::{fetch_screenshot_bytes, get_screenshot};
|
|||
pub use shorten::{get_short_url, post_shorten};
|
||||
pub use streetview::get_streetview;
|
||||
pub use invites::{get_invite, post_invites, post_redeem_invite};
|
||||
pub use journey::get_journey;
|
||||
pub use newsletter::patch_newsletter;
|
||||
pub use pricing::get_pricing;
|
||||
pub use stripe_webhook::post_stripe_webhook;
|
||||
pub use subscription::patch_subscription;
|
||||
pub use tiles::{get_style, get_tile, init_tile_reader};
|
||||
pub use rightmove_typeahead::get_rightmove_typeahead;
|
||||
pub use travel_destinations::get_travel_destinations;
|
||||
pub use travel_modes::get_travel_modes;
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
|
|||
parts.push(
|
||||
"User: \"cheap freehold house under 400k\"\n\
|
||||
Output: {\"numeric_filters\": [{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 400000}], \
|
||||
\"enum_filters\": [{\"name\": \"Leashold/Freehold\", \"values\": [\"Freehold\"]}, \
|
||||
\"enum_filters\": [{\"name\": \"Leasehold/Freehold\", \"values\": [\"Freehold\"]}, \
|
||||
{\"name\": \"Property type\", \"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
|
||||
\"notes\": \"\"}"
|
||||
.to_string(),
|
||||
|
|
@ -252,13 +252,13 @@ pub async fn post_ai_filters(
|
|||
/// ```json
|
||||
/// {
|
||||
/// "numeric_filters": [{"name": "Last known price", "bound": "max", "value": 300000}],
|
||||
/// "enum_filters": [{"name": "Leashold/Freehold", "values": ["Freehold"]}]
|
||||
/// "enum_filters": [{"name": "Leasehold/Freehold", "values": ["Freehold"]}]
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Output format (FeatureFilters):
|
||||
/// ```json
|
||||
/// { "Last known price": [0, 300000], "Leashold/Freehold": ["Freehold"] }
|
||||
/// { "Last known price": [0, 300000], "Leasehold/Freehold": ["Freehold"] }
|
||||
/// ```
|
||||
fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
|
||||
let mut result = serde_json::Map::new();
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use crate::parsing::{
|
|||
bounds_intersect, cell_for_row, h3_cell_bounds, needs_parent, parse_field_indices,
|
||||
parse_filters, require_bounds, row_passes_filters, validate_h3_resolution,
|
||||
};
|
||||
use crate::routes::travel_time::TravelTimeAgg;
|
||||
use crate::routes::travel_time::{parse_travel_entries, TravelTimeAgg};
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -40,62 +40,6 @@ pub struct HexagonParams {
|
|||
travel: Option<String>,
|
||||
}
|
||||
|
||||
struct TravelEntry {
|
||||
mode: String,
|
||||
slug: String,
|
||||
use_best: bool,
|
||||
filter_min: Option<f32>,
|
||||
filter_max: Option<f32>,
|
||||
}
|
||||
|
||||
/// Parse `travel` param into a list of travel entries.
|
||||
/// Format: `mode:slug` or `mode:slug:best` or `mode:slug:min:max` or `mode:slug:best:min:max`
|
||||
fn parse_travel_entries(travel_str: &str) -> Result<Vec<TravelEntry>, String> {
|
||||
let mut entries = Vec::new();
|
||||
let mut seen_keys = Vec::new();
|
||||
for segment in travel_str.split('|') {
|
||||
let parts: Vec<&str> = segment.split(':').collect();
|
||||
if parts.len() < 2 {
|
||||
return Err(format!(
|
||||
"each travel entry must be 'mode:slug' or 'mode:slug:min:max', got '{}'",
|
||||
segment
|
||||
));
|
||||
}
|
||||
let mode = parts[0].trim().to_string();
|
||||
let slug = parts[1].trim().to_string();
|
||||
|
||||
let use_best = parts.len() >= 3 && parts[2].trim() == "best";
|
||||
let filter_offset = if use_best { 1 } else { 0 };
|
||||
|
||||
let (filter_min, filter_max) = if parts.len() >= 4 + filter_offset {
|
||||
let min: f32 = parts[2 + filter_offset]
|
||||
.trim()
|
||||
.parse()
|
||||
.map_err(|_| format!("invalid travel filter min in '{}'", segment))?;
|
||||
let max: f32 = parts[3 + filter_offset]
|
||||
.trim()
|
||||
.parse()
|
||||
.map_err(|_| format!("invalid travel filter max in '{}'", segment))?;
|
||||
(Some(min), Some(max))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let key = format!("{}:{}", mode, slug);
|
||||
if seen_keys.contains(&key) {
|
||||
return Err(format!("duplicate travel entry '{}'", key));
|
||||
}
|
||||
seen_keys.push(key);
|
||||
entries.push(TravelEntry {
|
||||
mode,
|
||||
slug,
|
||||
use_best,
|
||||
filter_min,
|
||||
filter_max,
|
||||
});
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue