Can't even keep track anymore

This commit is contained in:
Andras Schmelczer 2026-02-13 09:16:28 +00:00
parent dccc1e439d
commit 3a3f899ea2
50 changed files with 1144 additions and 560 deletions

View file

@ -17,3 +17,7 @@ pub const POSTCODE_SEARCH_OFFSET: f64 = 0.02;
pub const AREA_SUMMARY_SYSTEM_PROMPT: &str = "You are an experienced estate agent with an expertise in area analysis. Help the user find his/her dream area or perfect postcode to settle in. The user is looking to buy a property based on the filters they provide. Given area statistics, write at most a single concise sentences summarising the key characteristics of the area. Be factual and highlight notable values. Do not use bullet points or headers — just flowing prose. Do not use markdown formatting. Highlight unusual facts that stand out from the average, but do not exaggerate. If there are no notable characteristics, say so. Always write at most a single sentence! Reason about the relation of different statistics to each other.";
pub const AREA_SUMMARY_MAX_TOKENS: usize = 300;
pub const AREA_SUMMARY_TEMPERATURE: f32 = 0.3;
pub const AI_FILTERS_SYSTEM_PROMPT: &str = "You are a property search assistant. The user will describe their ideal property or area in natural language. Your job is to translate their description into filter settings. ONLY set filters the user explicitly mentioned or clearly implied. Leave everything else out. Do not guess or add extra filters. If a request is ambiguous, prefer a wider range. Output valid JSON matching the provided schema.";
pub const AI_FILTERS_MAX_TOKENS: usize = 2000;
pub const AI_FILTERS_TEMPERATURE: f32 = 0.0;

View file

@ -1,7 +1,9 @@
mod places;
mod poi;
mod postcodes;
mod property;
pub use places::PlaceData;
pub use poi::{POICategoryGroup, POIData};
pub use postcodes::PostcodeData;
pub use property::{precompute_h3, FeatureStats, Histogram, PropertyData};
pub use property::{precompute_h3, FeatureStats, Histogram, PropertyData, RenovationEvent};

View file

@ -156,6 +156,17 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " rooms",
raw: false,
},
FeatureConfig {
name: "Estimated monthly rent",
bounds: Bounds::Percentile { low: 2.0, high: 98.0 },
step: 25.0,
description: "Median monthly private rent for the local area and bedroom count",
detail: "Median monthly rental price from ONS Private Rental Market Summary Statistics (Oct 2022 - Sep 2023). Matched by local authority district and estimated bedroom count (habitable rooms minus 1). Based on Valuation Office Agency lettings data.",
source: "ons-rental",
prefix: "£",
suffix: "/mo",
raw: false,
},
FeatureConfig {
name: "Date of last transaction",
bounds: Bounds::Fixed {

View file

@ -1,3 +1,4 @@
mod ai_filters;
mod area_summary;
mod export;
mod features;
@ -5,6 +6,7 @@ mod hexagon_stats;
pub(crate) mod hexagons;
mod me;
mod pb_proxy;
mod places;
mod pois;
mod postcode_stats;
mod postcodes;
@ -15,6 +17,7 @@ mod stats;
mod tiles;
pub(crate) mod travel_time;
pub use ai_filters::{build_feature_prompt, build_ollama_schema, post_ai_filters};
pub use area_summary::post_area_summary;
pub use export::get_export;
pub use features::{build_features_response, get_features, FeatureInfo, FeaturesResponse};
@ -22,6 +25,7 @@ pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
pub use me::get_me;
pub use pb_proxy::proxy_to_pocketbase;
pub use places::get_places;
pub use pois::{get_poi_categories, get_pois};
pub use postcode_stats::get_postcode_stats;
pub use postcodes::{get_postcode_lookup, get_postcodes};

View file

@ -90,7 +90,7 @@ fn build_prompt(req: &AreaSummaryRequest) -> String {
}
/// Strip `<think>...</think>` blocks from model output
fn strip_think_blocks(text: &str) -> String {
pub(crate) fn strip_think_blocks(text: &str) -> String {
let mut result = String::new();
let mut remaining = text;
while let Some(start) = remaining.find("<think>") {

View file

@ -160,7 +160,8 @@ pub async fn get_export(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let public_url = state.public_url.clone();

View file

@ -48,7 +48,7 @@ pub enum FeatureInfo {
#[derive(Clone, Serialize)]
pub struct FeatureGroupResponse {
name: String,
pub(crate) name: String,
pub(crate) features: Vec<FeatureInfo>,
}

View file

@ -135,7 +135,8 @@ pub async fn get_hexagons(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);
@ -147,7 +148,7 @@ pub async fn get_hexagons(
.map(parse_destination)
.transpose()
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
let mode = params.mode.clone().unwrap_or_else(|| "transit".into());
let mode = params.mode.clone().unwrap_or_else(|| "car".into());
// Capture what we need for the R5 call before moving state into spawn_blocking
let r5_url = state.r5_url.clone();
@ -249,16 +250,16 @@ pub async fn get_hexagons(
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
// If a destination was requested and R5 is configured, fetch travel times
// If a destination was requested and R5 is configured, fetch travel times.
if let Some(dest) = destination {
if r5_url.is_empty() {
return Err((
StatusCode::SERVICE_UNAVAILABLE,
"Travel time queries require R5 service (R5_URL not configured)".into(),
"Travel time queries require routing service (R5_URL not configured)".into(),
));
}
// Collect hex centroids from the response
// Collect hex centroids
let origins: Vec<[f64; 2]> = response
.features
.iter()
@ -297,8 +298,7 @@ pub async fn get_hexagons(
);
}
Err(err) => {
warn!("R5 travel time query failed, returning hexagons without travel_time: {}", err);
// Don't fail the whole request — just omit travel_time
warn!("Travel time query failed, returning hexagons without travel_time: {}", err);
}
}
}

View file

@ -0,0 +1,97 @@
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::state::AppState;
#[derive(Serialize)]
pub struct PlaceResult {
name: String,
place_type: String,
lat: f32,
lon: f32,
}
#[derive(Serialize)]
pub struct PlacesResponse {
places: Vec<PlaceResult>,
}
#[derive(Deserialize)]
#[allow(clippy::min_ident_chars)]
pub struct PlacesParams {
q: Option<String>,
limit: Option<usize>,
}
pub async fn get_places(
state: Arc<AppState>,
Query(params): Query<PlacesParams>,
) -> Result<Json<PlacesResponse>, (StatusCode, String)> {
let query = params
.q
.filter(|val| !val.is_empty())
.ok_or((StatusCode::BAD_REQUEST, "Missing 'q' parameter".to_string()))?;
let limit = params.limit.unwrap_or(7).min(20);
let places = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let query_lower = query.to_lowercase();
let pd = &state.place_data;
// Linear scan — ~50-100k rows, <1ms
let mut matches: Vec<(usize, bool, u8, usize)> = pd
.name_lower
.iter()
.enumerate()
.filter_map(|(idx, name)| {
if name.contains(&query_lower) {
let is_prefix = name.starts_with(&query_lower);
Some((idx, is_prefix, pd.type_rank[idx], pd.name[idx].len()))
} else {
None
}
})
.collect();
// Sort: prefix first, then by type rank (cities before hamlets), then shorter names first
matches.sort_unstable_by(|lhs, rhs| {
rhs.1
.cmp(&lhs.1)
.then(lhs.2.cmp(&rhs.2))
.then(lhs.3.cmp(&rhs.3))
});
matches.truncate(limit);
let results: Vec<PlaceResult> = matches
.iter()
.map(|&(idx, ..)| PlaceResult {
name: pd.name[idx].clone(),
place_type: pd.place_type.get(idx).to_string(),
lat: pd.lat[idx],
lon: pd.lon[idx],
})
.collect();
let elapsed = t0.elapsed();
info!(
query = query.as_str(),
results = results.len(),
scanned = pd.name_lower.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/places"
);
results
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok(Json(PlacesResponse { places }))
}

View file

@ -69,7 +69,8 @@ pub async fn get_postcodes(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);

View file

@ -13,6 +13,7 @@ use crate::parsing::{
cell_for_row, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
validate_h3_resolution,
};
use crate::data::RenovationEvent;
use crate::state::AppState;
#[derive(Deserialize)]
@ -41,6 +42,9 @@ pub struct Property {
pub is_construction_date_approximate: Option<bool>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub renovation_history: Vec<RenovationEvent>,
#[serde(flatten)]
pub features: FxHashMap<String, f32>,
}
@ -214,6 +218,7 @@ pub async fn get_hexagon_properties(
),
lat: state.data.lat[row],
lon: state.data.lon[row],
renovation_history: state.data.renovation_history(row).to_vec(),
features,
}
})

View file

@ -2,20 +2,25 @@ use serde::{Deserialize, Serialize};
use tracing::warn;
#[derive(Serialize)]
struct TravelTimeRequest {
origins: Vec<[f64; 2]>,
destination: [f64; 2],
struct R5Request {
origin: [f64; 2],
destinations: Vec<[f64; 2]>,
mode: String,
}
#[derive(Deserialize)]
struct TravelTimeResponse {
travel_times: Vec<Option<f64>>,
struct R5Response {
travel_times: Vec<f64>,
}
/// Call the R5 service to compute many-to-one travel times.
/// Call the R5 Java service to compute one-to-many travel times.
///
/// Returns a Vec of travel times in minutes (one per origin), with None for unreachable origins.
/// `origins` are hex centroids as `[lat, lon]`.
/// `destination` is the user-chosen point as `[lat, lon]`.
/// `mode` is one of "car", "bicycle", "walking", "transit".
///
/// R5 computes from destination to all origins (one-to-many from the user's chosen point).
/// Returns a Vec of travel times in minutes (one per origin), with None for unreachable.
pub async fn fetch_travel_times(
client: &reqwest::Client,
r5_url: &str,
@ -23,36 +28,45 @@ pub async fn fetch_travel_times(
destination: [f64; 2],
mode: &str,
) -> Result<Vec<Option<f64>>, String> {
let url = format!("{}/travel-times", r5_url);
if origins.is_empty() {
return Ok(vec![]);
}
let request_body = TravelTimeRequest {
origins,
destination,
let body = R5Request {
origin: destination,
destinations: origins,
mode: mode.to_string(),
};
let resp = client
.post(&url)
.json(&request_body)
.timeout(std::time::Duration::from_secs(60))
.post(format!("{}/travel-times", r5_url))
.json(&body)
.timeout(std::time::Duration::from_secs(30))
.send()
.await
.map_err(|e| {
warn!("R5 request failed: {}", e);
format!("R5 service error: {}", e)
format!("R5 routing error: {}", e)
})?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
warn!("R5 returned {}: {}", status, body);
return Err(format!("R5 service returned {}: {}", status, body));
return Err(format!("R5 returned {}: {}", status, body));
}
let body: TravelTimeResponse = resp.json().await.map_err(|e| {
let r5_resp: R5Response = resp.json().await.map_err(|e| {
warn!("Failed to parse R5 response: {}", e);
format!("Failed to parse R5 response: {}", e)
})?;
Ok(body.travel_times)
// R5 returns -1 for unreachable destinations
let travel_times: Vec<Option<f64>> = r5_resp
.travel_times
.into_iter()
.map(|t| if t < 0.0 { None } else { Some(t) })
.collect();
Ok(travel_times)
}

View file

@ -3,7 +3,7 @@ use std::sync::Arc;
use rustc_hash::FxHashMap;
use crate::auth::TokenCache;
use crate::data::{POICategoryGroup, POIData, PostcodeData, PropertyData};
use crate::data::{POICategoryGroup, POIData, PlaceData, PostcodeData, PropertyData};
use crate::routes::FeaturesResponse;
use crate::utils::GridIndex;
@ -15,6 +15,7 @@ pub struct AppState {
pub h3_cells: Vec<u64>,
pub poi_data: POIData,
pub poi_grid: GridIndex,
pub place_data: PlaceData,
/// Postcode boundary data for high-zoom rendering
pub postcode_data: PostcodeData,
/// O(1) lookup: feature name → index in feature_names/feature_data
@ -43,8 +44,12 @@ pub struct AppState {
pub ollama_url: String,
/// Ollama model name for area summaries (e.g. gemma3:12b)
pub ollama_model: String,
/// R5 routing service URL for real-time travel times (empty = disabled)
/// R5 routing service URL for all travel times (empty = disabled)
pub r5_url: String,
/// Token validation cache (60s TTL)
pub token_cache: Arc<TokenCache>,
/// JSON schema for Ollama structured output in AI filters
pub ai_filters_schema: serde_json::Value,
/// Feature listing portion of the AI filters prompt
pub ai_filters_feature_prompt: String,
}

View file

@ -1,7 +1,9 @@
mod grid_index;
mod hash;
mod interned_column;
mod llm;
pub use grid_index::GridIndex;
pub use hash::{generate_priorities, splitmix64_hash};
pub use interned_column::InternedColumn;
pub use llm::strip_think_blocks;

View file

@ -0,0 +1,15 @@
/// Strip `<think>...</think>` blocks from model output
pub fn strip_think_blocks(text: &str) -> String {
let mut result = String::new();
let mut remaining = text;
while let Some(start) = remaining.find("<think>") {
result.push_str(&remaining[..start]);
if let Some(end) = remaining[start..].find("</think>") {
remaining = &remaining[start + end + 8..];
} else {
return result;
}
}
result.push_str(remaining);
result
}