Can't even keep track anymore
This commit is contained in:
parent
dccc1e439d
commit
3a3f899ea2
50 changed files with 1144 additions and 560 deletions
|
|
@ -17,3 +17,7 @@ pub const POSTCODE_SEARCH_OFFSET: f64 = 0.02;
|
|||
pub const AREA_SUMMARY_SYSTEM_PROMPT: &str = "You are an experienced estate agent with an expertise in area analysis. Help the user find his/her dream area or perfect postcode to settle in. The user is looking to buy a property based on the filters they provide. Given area statistics, write at most a single concise sentences summarising the key characteristics of the area. Be factual and highlight notable values. Do not use bullet points or headers — just flowing prose. Do not use markdown formatting. Highlight unusual facts that stand out from the average, but do not exaggerate. If there are no notable characteristics, say so. Always write at most a single sentence! Reason about the relation of different statistics to each other.";
|
||||
pub const AREA_SUMMARY_MAX_TOKENS: usize = 300;
|
||||
pub const AREA_SUMMARY_TEMPERATURE: f32 = 0.3;
|
||||
|
||||
pub const AI_FILTERS_SYSTEM_PROMPT: &str = "You are a property search assistant. The user will describe their ideal property or area in natural language. Your job is to translate their description into filter settings. ONLY set filters the user explicitly mentioned or clearly implied. Leave everything else out. Do not guess or add extra filters. If a request is ambiguous, prefer a wider range. Output valid JSON matching the provided schema.";
|
||||
pub const AI_FILTERS_MAX_TOKENS: usize = 2000;
|
||||
pub const AI_FILTERS_TEMPERATURE: f32 = 0.0;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
mod places;
|
||||
mod poi;
|
||||
mod postcodes;
|
||||
mod property;
|
||||
|
||||
pub use places::PlaceData;
|
||||
pub use poi::{POICategoryGroup, POIData};
|
||||
pub use postcodes::PostcodeData;
|
||||
pub use property::{precompute_h3, FeatureStats, Histogram, PropertyData};
|
||||
pub use property::{precompute_h3, FeatureStats, Histogram, PropertyData, RenovationEvent};
|
||||
|
|
|
|||
|
|
@ -156,6 +156,17 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
suffix: " rooms",
|
||||
raw: false,
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Estimated monthly rent",
|
||||
bounds: Bounds::Percentile { low: 2.0, high: 98.0 },
|
||||
step: 25.0,
|
||||
description: "Median monthly private rent for the local area and bedroom count",
|
||||
detail: "Median monthly rental price from ONS Private Rental Market Summary Statistics (Oct 2022 - Sep 2023). Matched by local authority district and estimated bedroom count (habitable rooms minus 1). Based on Valuation Office Agency lettings data.",
|
||||
source: "ons-rental",
|
||||
prefix: "£",
|
||||
suffix: "/mo",
|
||||
raw: false,
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Date of last transaction",
|
||||
bounds: Bounds::Fixed {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
mod ai_filters;
|
||||
mod area_summary;
|
||||
mod export;
|
||||
mod features;
|
||||
|
|
@ -5,6 +6,7 @@ mod hexagon_stats;
|
|||
pub(crate) mod hexagons;
|
||||
mod me;
|
||||
mod pb_proxy;
|
||||
mod places;
|
||||
mod pois;
|
||||
mod postcode_stats;
|
||||
mod postcodes;
|
||||
|
|
@ -15,6 +17,7 @@ mod stats;
|
|||
mod tiles;
|
||||
pub(crate) mod travel_time;
|
||||
|
||||
pub use ai_filters::{build_feature_prompt, build_ollama_schema, post_ai_filters};
|
||||
pub use area_summary::post_area_summary;
|
||||
pub use export::get_export;
|
||||
pub use features::{build_features_response, get_features, FeatureInfo, FeaturesResponse};
|
||||
|
|
@ -22,6 +25,7 @@ pub use hexagon_stats::get_hexagon_stats;
|
|||
pub use hexagons::get_hexagons;
|
||||
pub use me::get_me;
|
||||
pub use pb_proxy::proxy_to_pocketbase;
|
||||
pub use places::get_places;
|
||||
pub use pois::{get_poi_categories, get_pois};
|
||||
pub use postcode_stats::get_postcode_stats;
|
||||
pub use postcodes::{get_postcode_lookup, get_postcodes};
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ fn build_prompt(req: &AreaSummaryRequest) -> String {
|
|||
}
|
||||
|
||||
/// Strip `<think>...</think>` blocks from model output
|
||||
fn strip_think_blocks(text: &str) -> String {
|
||||
pub(crate) fn strip_think_blocks(text: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let mut remaining = text;
|
||||
while let Some(start) = remaining.find("<think>") {
|
||||
|
|
|
|||
|
|
@ -160,7 +160,8 @@ pub async fn get_export(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
|
||||
let public_url = state.public_url.clone();
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ pub enum FeatureInfo {
|
|||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub struct FeatureGroupResponse {
|
||||
name: String,
|
||||
pub(crate) name: String,
|
||||
pub(crate) features: Vec<FeatureInfo>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ pub async fn get_hexagons(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);
|
||||
|
|
@ -147,7 +148,7 @@ pub async fn get_hexagons(
|
|||
.map(parse_destination)
|
||||
.transpose()
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
|
||||
let mode = params.mode.clone().unwrap_or_else(|| "transit".into());
|
||||
let mode = params.mode.clone().unwrap_or_else(|| "car".into());
|
||||
|
||||
// Capture what we need for the R5 call before moving state into spawn_blocking
|
||||
let r5_url = state.r5_url.clone();
|
||||
|
|
@ -249,16 +250,16 @@ pub async fn get_hexagons(
|
|||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||
|
||||
// If a destination was requested and R5 is configured, fetch travel times
|
||||
// If a destination was requested and R5 is configured, fetch travel times.
|
||||
if let Some(dest) = destination {
|
||||
if r5_url.is_empty() {
|
||||
return Err((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"Travel time queries require R5 service (R5_URL not configured)".into(),
|
||||
"Travel time queries require routing service (R5_URL not configured)".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Collect hex centroids from the response
|
||||
// Collect hex centroids
|
||||
let origins: Vec<[f64; 2]> = response
|
||||
.features
|
||||
.iter()
|
||||
|
|
@ -297,8 +298,7 @@ pub async fn get_hexagons(
|
|||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("R5 travel time query failed, returning hexagons without travel_time: {}", err);
|
||||
// Don't fail the whole request — just omit travel_time
|
||||
warn!("Travel time query failed, returning hexagons without travel_time: {}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
97
server-rs/src/routes/places.rs
Normal file
97
server-rs/src/routes/places.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PlaceResult {
|
||||
name: String,
|
||||
place_type: String,
|
||||
lat: f32,
|
||||
lon: f32,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PlacesResponse {
|
||||
places: Vec<PlaceResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[allow(clippy::min_ident_chars)]
|
||||
pub struct PlacesParams {
|
||||
q: Option<String>,
|
||||
limit: Option<usize>,
|
||||
}
|
||||
|
||||
pub async fn get_places(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<PlacesParams>,
|
||||
) -> Result<Json<PlacesResponse>, (StatusCode, String)> {
|
||||
let query = params
|
||||
.q
|
||||
.filter(|val| !val.is_empty())
|
||||
.ok_or((StatusCode::BAD_REQUEST, "Missing 'q' parameter".to_string()))?;
|
||||
|
||||
let limit = params.limit.unwrap_or(7).min(20);
|
||||
|
||||
let places = tokio::task::spawn_blocking(move || {
|
||||
let t0 = std::time::Instant::now();
|
||||
let query_lower = query.to_lowercase();
|
||||
let pd = &state.place_data;
|
||||
|
||||
// Linear scan — ~50-100k rows, <1ms
|
||||
let mut matches: Vec<(usize, bool, u8, usize)> = pd
|
||||
.name_lower
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, name)| {
|
||||
if name.contains(&query_lower) {
|
||||
let is_prefix = name.starts_with(&query_lower);
|
||||
Some((idx, is_prefix, pd.type_rank[idx], pd.name[idx].len()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort: prefix first, then by type rank (cities before hamlets), then shorter names first
|
||||
matches.sort_unstable_by(|lhs, rhs| {
|
||||
rhs.1
|
||||
.cmp(&lhs.1)
|
||||
.then(lhs.2.cmp(&rhs.2))
|
||||
.then(lhs.3.cmp(&rhs.3))
|
||||
});
|
||||
|
||||
matches.truncate(limit);
|
||||
|
||||
let results: Vec<PlaceResult> = matches
|
||||
.iter()
|
||||
.map(|&(idx, ..)| PlaceResult {
|
||||
name: pd.name[idx].clone(),
|
||||
place_type: pd.place_type.get(idx).to_string(),
|
||||
lat: pd.lat[idx],
|
||||
lon: pd.lon[idx],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let elapsed = t0.elapsed();
|
||||
info!(
|
||||
query = query.as_str(),
|
||||
results = results.len(),
|
||||
scanned = pd.name_lower.len(),
|
||||
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||
"GET /api/places"
|
||||
);
|
||||
|
||||
results
|
||||
})
|
||||
.await
|
||||
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||
|
||||
Ok(Json(PlacesResponse { places }))
|
||||
}
|
||||
|
|
@ -69,7 +69,8 @@ pub async fn get_postcodes(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
let field_indices = parse_field_indices(params.fields.as_deref(), &state.feature_name_to_index);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use crate::parsing::{
|
|||
cell_for_row, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
|
||||
validate_h3_resolution,
|
||||
};
|
||||
use crate::data::RenovationEvent;
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -41,6 +42,9 @@ pub struct Property {
|
|||
|
||||
pub is_construction_date_approximate: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub renovation_history: Vec<RenovationEvent>,
|
||||
|
||||
#[serde(flatten)]
|
||||
pub features: FxHashMap<String, f32>,
|
||||
}
|
||||
|
|
@ -214,6 +218,7 @@ pub async fn get_hexagon_properties(
|
|||
),
|
||||
lat: state.data.lat[row],
|
||||
lon: state.data.lon[row],
|
||||
renovation_history: state.data.renovation_history(row).to_vec(),
|
||||
features,
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -2,20 +2,25 @@ use serde::{Deserialize, Serialize};
|
|||
use tracing::warn;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TravelTimeRequest {
|
||||
origins: Vec<[f64; 2]>,
|
||||
destination: [f64; 2],
|
||||
struct R5Request {
|
||||
origin: [f64; 2],
|
||||
destinations: Vec<[f64; 2]>,
|
||||
mode: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct TravelTimeResponse {
|
||||
travel_times: Vec<Option<f64>>,
|
||||
struct R5Response {
|
||||
travel_times: Vec<f64>,
|
||||
}
|
||||
|
||||
/// Call the R5 service to compute many-to-one travel times.
|
||||
/// Call the R5 Java service to compute one-to-many travel times.
|
||||
///
|
||||
/// Returns a Vec of travel times in minutes (one per origin), with None for unreachable origins.
|
||||
/// `origins` are hex centroids as `[lat, lon]`.
|
||||
/// `destination` is the user-chosen point as `[lat, lon]`.
|
||||
/// `mode` is one of "car", "bicycle", "walking", "transit".
|
||||
///
|
||||
/// R5 computes from destination to all origins (one-to-many from the user's chosen point).
|
||||
/// Returns a Vec of travel times in minutes (one per origin), with None for unreachable.
|
||||
pub async fn fetch_travel_times(
|
||||
client: &reqwest::Client,
|
||||
r5_url: &str,
|
||||
|
|
@ -23,36 +28,45 @@ pub async fn fetch_travel_times(
|
|||
destination: [f64; 2],
|
||||
mode: &str,
|
||||
) -> Result<Vec<Option<f64>>, String> {
|
||||
let url = format!("{}/travel-times", r5_url);
|
||||
if origins.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let request_body = TravelTimeRequest {
|
||||
origins,
|
||||
destination,
|
||||
let body = R5Request {
|
||||
origin: destination,
|
||||
destinations: origins,
|
||||
mode: mode.to_string(),
|
||||
};
|
||||
|
||||
let resp = client
|
||||
.post(&url)
|
||||
.json(&request_body)
|
||||
.timeout(std::time::Duration::from_secs(60))
|
||||
.post(format!("{}/travel-times", r5_url))
|
||||
.json(&body)
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
warn!("R5 request failed: {}", e);
|
||||
format!("R5 service error: {}", e)
|
||||
format!("R5 routing error: {}", e)
|
||||
})?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
warn!("R5 returned {}: {}", status, body);
|
||||
return Err(format!("R5 service returned {}: {}", status, body));
|
||||
return Err(format!("R5 returned {}: {}", status, body));
|
||||
}
|
||||
|
||||
let body: TravelTimeResponse = resp.json().await.map_err(|e| {
|
||||
let r5_resp: R5Response = resp.json().await.map_err(|e| {
|
||||
warn!("Failed to parse R5 response: {}", e);
|
||||
format!("Failed to parse R5 response: {}", e)
|
||||
})?;
|
||||
|
||||
Ok(body.travel_times)
|
||||
// R5 returns -1 for unreachable destinations
|
||||
let travel_times: Vec<Option<f64>> = r5_resp
|
||||
.travel_times
|
||||
.into_iter()
|
||||
.map(|t| if t < 0.0 { None } else { Some(t) })
|
||||
.collect();
|
||||
|
||||
Ok(travel_times)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::sync::Arc;
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::auth::TokenCache;
|
||||
use crate::data::{POICategoryGroup, POIData, PostcodeData, PropertyData};
|
||||
use crate::data::{POICategoryGroup, POIData, PlaceData, PostcodeData, PropertyData};
|
||||
use crate::routes::FeaturesResponse;
|
||||
use crate::utils::GridIndex;
|
||||
|
||||
|
|
@ -15,6 +15,7 @@ pub struct AppState {
|
|||
pub h3_cells: Vec<u64>,
|
||||
pub poi_data: POIData,
|
||||
pub poi_grid: GridIndex,
|
||||
pub place_data: PlaceData,
|
||||
/// Postcode boundary data for high-zoom rendering
|
||||
pub postcode_data: PostcodeData,
|
||||
/// O(1) lookup: feature name → index in feature_names/feature_data
|
||||
|
|
@ -43,8 +44,12 @@ pub struct AppState {
|
|||
pub ollama_url: String,
|
||||
/// Ollama model name for area summaries (e.g. gemma3:12b)
|
||||
pub ollama_model: String,
|
||||
/// R5 routing service URL for real-time travel times (empty = disabled)
|
||||
/// R5 routing service URL for all travel times (empty = disabled)
|
||||
pub r5_url: String,
|
||||
/// Token validation cache (60s TTL)
|
||||
pub token_cache: Arc<TokenCache>,
|
||||
/// JSON schema for Ollama structured output in AI filters
|
||||
pub ai_filters_schema: serde_json::Value,
|
||||
/// Feature listing portion of the AI filters prompt
|
||||
pub ai_filters_feature_prompt: String,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
mod grid_index;
|
||||
mod hash;
|
||||
mod interned_column;
|
||||
mod llm;
|
||||
|
||||
pub use grid_index::GridIndex;
|
||||
pub use hash::{generate_priorities, splitmix64_hash};
|
||||
pub use interned_column::InternedColumn;
|
||||
pub use llm::strip_think_blocks;
|
||||
|
|
|
|||
15
server-rs/src/utils/llm.rs
Normal file
15
server-rs/src/utils/llm.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
/// Strip `<think>...</think>` blocks from model output
|
||||
pub fn strip_think_blocks(text: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let mut remaining = text;
|
||||
while let Some(start) = remaining.find("<think>") {
|
||||
result.push_str(&remaining[..start]);
|
||||
if let Some(end) = remaining[start..].find("</think>") {
|
||||
remaining = &remaining[start + end + 8..];
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
result.push_str(remaining);
|
||||
result
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue