Deploy again

This commit is contained in:
Andras Schmelczer 2026-02-19 22:24:06 +00:00
parent ffe080adef
commit 787428f1a5
18 changed files with 717 additions and 223 deletions

View file

@ -19,7 +19,7 @@ pub const AI_FILTERS_TEMPERATURE: f32 = 0.0;
/// Inner London free zone bounds (south, west, north, east) — roughly zones 12.
/// Users without a license can only query data within these bounds.
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.48, -0.18, 51.54, -0.02);
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.42, -0.34, 51.60, 0.14);
/// Homepage demo center (lat, lng). Unlicensed hexagon requests are allowed
/// when the center of the requested bounds is within DEMO_CENTER_TOLERANCE of this point.

View file

@ -437,82 +437,11 @@ impl PropertyData {
tracing::info!(rows = prop_count, "Properties joined with postcodes");
// Load online listings (buy + rent) — these have their own lat/lon.
// Normalize column names from finder output to server-expected names.
// strict=false: columns already using the new name are silently skipped.
// Expects the new finder parquet format with human-readable column names.
let load_listings = |path: &Path, label: &str| -> anyhow::Result<DataFrame> {
tracing::info!("Loading {} listings from {:?}", label, path);
let mut lf = LazyFrame::scan_parquet(path, Default::default())
let lf = LazyFrame::scan_parquet(path, Default::default())
.with_context(|| format!("Failed to scan {label} listings parquet"))?;
let schema = lf
.collect_schema()
.with_context(|| format!("Failed to read {label} listings schema"))?;
// Rename raw finder columns → server-expected names (no-op if already renamed)
let lf = lf.rename(
[
"postcode",
"address",
"latitude",
"longitude",
"bedrooms",
"bathrooms",
"total_rooms",
"tenure",
"property_type",
"property_sub_type",
"price_qualifier",
"floorspace_sqm",
"url",
"features",
],
[
"Postcode",
"Address per Property Register",
"lat",
"lon",
"Bedrooms",
"Bathrooms",
"Number of bedrooms & living rooms",
"Leashold/Freehold",
"Property type",
"Property sub-type",
"Price qualifier",
"Total floor area (sqm)",
"Listing URL",
"Listing features",
],
false,
);
// Derive missing columns for raw finder output that doesn't have them
let listing_status = if label == "buy" {
"For sale"
} else {
"For rent"
};
let lf = if schema.get("Listing status").is_none() {
lf.with_column(lit(listing_status).alias("Listing status"))
} else {
lf
};
let lf = if schema.get("Asking price").is_none() && schema.get("price").is_some() {
if label == "buy" {
lf.with_column(col("price").alias("Asking price"))
} else {
// Normalize rent to monthly: weekly×52/12, yearly÷12
lf.with_column(
when(col("price_frequency").eq(lit("weekly")))
.then(col("price").cast(DataType::Float64) * lit(52.0 / 12.0))
.when(col("price_frequency").eq(lit("yearly")))
.then(col("price").cast(DataType::Float64) / lit(12.0))
.otherwise(col("price").cast(DataType::Float64))
.cast(DataType::Int64)
.alias("Asking rent (monthly)"),
)
}
} else {
lf
};
// Join with postcodes for area features (listings have their own lat/lon)
let pc_no_coords = postcode_df.clone().lazy().drop(["lat", "lon"]);

View file

@ -478,7 +478,7 @@ async fn main() -> anyhow::Result<()> {
)
.route(
"/api/export",
get(move |ext, query| routes::get_export(state_export.clone(), ext, query))
get(move |headers, ext, query| routes::get_export(state_export.clone(), headers, ext, query))
.layer(ConcurrencyLimitLayer::new(3)),
)
.route("/api/me", get(routes::get_me))
@ -592,6 +592,16 @@ async fn main() -> anyhow::Result<()> {
.layer(CompressionLayer::new().zstd(true).gzip(true))
.layer(TraceLayer::new_for_http());
// Lock all current and future memory pages to prevent swapping
unsafe {
if libc::mlockall(libc::MCL_CURRENT | libc::MCL_FUTURE) != 0 {
let err = std::io::Error::last_os_error();
tracing::warn!("mlockall failed (need CAP_IPC_LOCK or sufficient RLIMIT_MEMLOCK): {err}");
} else {
info!("All memory pages locked (mlockall)");
}
}
let addr = consts::SERVER_ADDRESS;
let listener = tokio::net::TcpListener::bind(addr)
.await

View file

@ -11,6 +11,7 @@ use crate::state::AppState;
const OG_PLACEHOLDER: &str = r#"<meta name="x-og-placeholder" content="__PERFECT_POSTCODE_OG_TAGS__"/>"#;
pub async fn og_middleware(request: Request, next: Next) -> Response {
let path = request.uri().path().to_string();
// Capture the query string before passing the request through
let query_string = request.uri().query().unwrap_or("").to_string();
@ -19,6 +20,11 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
let response = next.run(request).await;
// Only inject OG tags into SPA HTML responses, not proxied PocketBase responses
if path.starts_with("/pb/") || path.starts_with("/api/") {
return response;
}
let content_type = response
.headers()
.get(header::CONTENT_TYPE)

View file

@ -1,6 +1,6 @@
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use tracing::info;
#[derive(Deserialize)]
struct AuthResponse {
@ -432,29 +432,25 @@ pub async fn ensure_oauth_providers(
// Update OAuth2 providers
let providers = settings
.pointer_mut("/oauth2/providers")
.and_then(|v| v.as_array_mut());
.and_then(|v| v.as_array_mut())
.ok_or_else(|| anyhow::anyhow!("PocketBase settings missing oauth2.providers array — cannot configure OAuth"))?;
if let Some(providers) = providers {
for provider in providers.iter_mut() {
let name = provider
.get("name")
.and_then(|n| n.as_str())
.unwrap_or("");
match name {
"google" => {
provider["clientId"] = serde_json::json!(google_client_id);
provider["clientSecret"] = serde_json::json!(google_client_secret);
provider["enabled"] = serde_json::json!(true);
info!("Configured Google OAuth provider");
}
_ => {}
}
let google = match providers
.iter()
.position(|p| p.get("name").and_then(|n| n.as_str()) == Some("google"))
{
Some(idx) => &mut providers[idx],
None => {
info!("Google provider not found in PocketBase settings — adding it");
providers.push(serde_json::json!({"name": "google"}));
providers.last_mut().expect("just pushed")
}
} else {
warn!("PocketBase settings missing oauth2.providers array — cannot configure OAuth");
return Ok(());
}
};
google["clientId"] = serde_json::json!(google_client_id);
google["clientSecret"] = serde_json::json!(google_client_secret);
google["enabled"] = serde_json::json!(true);
info!("Configured Google OAuth provider");
// PATCH settings back
let patch_resp = client

View file

@ -37,7 +37,7 @@ pub use pois::{get_poi_categories, get_pois};
pub use postcode_stats::get_postcode_stats;
pub use postcodes::{get_postcode_lookup, get_postcodes};
pub use properties::get_hexagon_properties;
pub use screenshot::get_screenshot;
pub use screenshot::{fetch_screenshot_bytes, get_screenshot};
pub use shorten::{get_short_url, post_shorten};
pub use streetview::get_streetview;
pub use invites::{get_invite, post_invites, post_redeem_invite};

View file

@ -40,10 +40,10 @@ pub fn build_ollama_schema(_features: &FeaturesResponse) -> Value {
"type": "object",
"properties": {
"name": { "type": "string" },
"min": { "type": "number" },
"max": { "type": "number" }
"bound": { "type": "string", "enum": ["min", "max"] },
"value": { "type": "number" }
},
"required": ["name"]
"required": ["name", "bound", "value"]
}
},
"enum_filters": {
@ -80,11 +80,11 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
Rules:\n\
- ONLY set filters the user explicitly mentioned or clearly implied.\n\
- Leave out any filter the user did not mention. Empty arrays are fine.\n\
- For numeric filters, omit \"min\" to leave the lower bound open, \
omit \"max\" to leave the upper bound open.\n\
- Each numeric filter sets ONE bound only: \"min\" (at least this value) \
or \"max\" (at most this value). Never set two filters on the same feature.\n\
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on crime features. \
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 2km.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
@ -98,6 +98,10 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
// Feature catalogue
parts.push("\n--- AVAILABLE FEATURES ---\n".to_string());
for group in &features.groups {
// Skip individual crime features — only expose "Crime summary" aggregates
if group.name == "Crime" {
continue;
}
parts.push(format!("## {}", group.name));
for feature in &group.features {
match feature {
@ -141,7 +145,7 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"User: \"cheap freehold house under 400k\"\n\
Output: {\"numeric_filters\": [{\"name\": \"Last known price\", \"max\": 400000}], \
Output: {\"numeric_filters\": [{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 400000}], \
\"enum_filters\": [{\"name\": \"Leashold/Freehold\", \"values\": [\"Freehold\"]}, \
{\"name\": \"Property type\", \"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
\"notes\": \"\"}"
@ -151,12 +155,12 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"safe quiet area with good schools and parks\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Violence and sexual offences (avg/yr)\", \"max\": 20}, \
{\"name\": \"Burglary (avg/yr)\", \"max\": 10}, \
{\"name\": \"Noise (dB)\", \"max\": 55}, \
{\"name\": \"Good+ primary schools within 5km\", \"min\": 5}, \
{\"name\": \"Good+ secondary schools within 5km\", \"min\": 2}, \
{\"name\": \"Number of parks within 2km\", \"min\": 3}], \
{\"name\": \"Serious crime (avg/yr)\", \"bound\": \"max\", \"value\": 20}, \
{\"name\": \"Minor crime (avg/yr)\", \"bound\": \"max\", \"value\": 50}, \
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 5km\", \"bound\": \"min\", \"value\": 5}, \
{\"name\": \"Good+ secondary schools within 5km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Number of parks within 2km\", \"bound\": \"min\", \"value\": 3}], \
\"enum_filters\": [], \"notes\": \"\"}"
.to_string(),
);
@ -164,9 +168,9 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"3 bed flat under 300k with fast broadband near the beach\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Last known price\", \"max\": 300000}, \
{\"name\": \"Number of bedrooms & living rooms\", \"min\": 4}, \
{\"name\": \"Max available download speed (Mbps)\", \"min\": 100}], \
{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 300000}, \
{\"name\": \"Number of bedrooms & living rooms\", \"bound\": \"min\", \"value\": 4}, \
{\"name\": \"Max available download speed (Mbps)\", \"bound\": \"min\", \"value\": 100}], \
\"enum_filters\": [{\"name\": \"Property type\", \"values\": [\"Flat\"]}], \
\"notes\": \"No filter for: beach proximity\"}"
.to_string(),
@ -175,9 +179,9 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"large family home with a garden near restaurants\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Total floor area (sqm)\", \"min\": 100}, \
{\"name\": \"Number of bedrooms & living rooms\", \"min\": 5}, \
{\"name\": \"Number of restaurants within 2km\", \"min\": 10}], \
{\"name\": \"Total floor area (sqm)\", \"bound\": \"min\", \"value\": 100}, \
{\"name\": \"Number of bedrooms & living rooms\", \"bound\": \"min\", \"value\": 5}, \
{\"name\": \"Number of restaurants within 2km\", \"bound\": \"min\", \"value\": 10}], \
\"enum_filters\": [{\"name\": \"Property type\", \
\"values\": [\"Detached\", \"Semi-Detached\"]}], \
\"notes\": \"No filter for: garden\"}"
@ -187,7 +191,7 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
// Output format reminder
parts.push(
"\n--- OUTPUT FORMAT ---\n\
{\"numeric_filters\": [...], \"enum_filters\": [...], \"notes\": \"...\"}\n\
{\"numeric_filters\": [{\"name\": \"...\", \"bound\": \"min\"|\"max\", \"value\": N}, ...], \"enum_filters\": [...], \"notes\": \"...\"}\n\
Respond with ONLY the JSON object. No explanation."
.to_string(),
);
@ -244,10 +248,10 @@ pub async fn post_ai_filters(
/// Validate LLM output against feature metadata and convert to FeatureFilters format.
///
/// Input format (array-based, grammar-friendly):
/// Input format (array-based, each numeric filter sets one bound):
/// ```json
/// {
/// "numeric_filters": [{"name": "Last known price", "min": 0, "max": 300000}],
/// "numeric_filters": [{"name": "Last known price", "bound": "max", "value": 300000}],
/// "enum_filters": [{"name": "Leashold/Freehold", "values": ["Freehold"]}]
/// }
/// ```
@ -278,7 +282,7 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
}
}
// Process numeric filters
// Process numeric filters — each sets one bound (min or max)
if let Some(arr) = raw.get("numeric_filters").and_then(|val| val.as_array()) {
for item in arr {
let name = match item.get("name").and_then(|val| val.as_str()) {
@ -289,16 +293,19 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
Some(range) => *range,
None => continue,
};
let filter_min = item
.get("min")
.and_then(|val| val.as_f64())
.map(|num| num.max(feat_min as f64).min(feat_max as f64) as f32)
.unwrap_or(feat_min);
let filter_max = item
.get("max")
.and_then(|val| val.as_f64())
.map(|num| num.max(feat_min as f64).min(feat_max as f64) as f32)
.unwrap_or(feat_max);
let bound = match item.get("bound").and_then(|val| val.as_str()) {
Some(b) => b,
None => continue,
};
let value = match item.get("value").and_then(|val| val.as_f64()) {
Some(v) => v.max(feat_min as f64).min(feat_max as f64) as f32,
None => continue,
};
let (filter_min, filter_max) = match bound {
"min" => (value, feat_max),
"max" => (feat_min, value),
_ => continue,
};
// Only include if range is narrower than full range
if filter_min > feat_min || filter_max < feat_max {
result.insert(name.to_string(), json!([filter_min, filter_max]));

View file

@ -3,7 +3,7 @@ use std::hash::{Hash, Hasher};
use std::sync::Arc;
use axum::extract::Query;
use axum::http::{header, StatusCode};
use axum::http::{header, HeaderMap, StatusCode};
use axum::response::IntoResponse;
use axum::Extension;
use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook};
@ -14,7 +14,7 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::licensing::check_license_bounds;
use crate::parsing::{parse_field_indices, parse_filters, require_bounds, row_passes_filters};
use crate::routes::FeatureInfo;
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
use crate::state::AppState;
const MAX_EXPORT_POSTCODES: usize = 250;
@ -120,39 +120,9 @@ fn build_frontend_params(
parts.join("&")
}
/// Fetch a screenshot image from the screenshot service for Excel export.
async fn fetch_screenshot(
state: &AppState,
frontend_params: &str,
) -> Option<Vec<u8>> {
let screenshot_base = &state.screenshot_url;
let url = format!("{}/screenshot?{}", screenshot_base, frontend_params);
match state.http_client.get(&url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(bytes) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes.to_vec())
}
Err(err) => {
warn!("Failed to read screenshot response for export: {err}");
None
}
},
Ok(resp) => {
warn!(status = %resp.status(), "Screenshot service returned error for export");
None
}
Err(err) => {
warn!("Failed to reach screenshot service for export: {err}");
None
}
}
}
pub async fn get_export(
state: Arc<AppState>,
headers: HeaderMap,
Extension(user): Extension<OptionalUser>,
Query(params): Query<ExportParams>,
) -> Result<impl IntoResponse, axum::response::Response> {
@ -186,7 +156,18 @@ pub async fn get_export(
build_frontend_params(center_lat, center_lon, zoom, filters_str.as_deref());
// Fetch screenshot (async, before spawn_blocking)
let screenshot_bytes = fetch_screenshot(&state, &frontend_params).await;
let auth_header = headers.get(header::AUTHORIZATION);
let screenshot_bytes = match fetch_screenshot_bytes(&state, &frontend_params, auth_header).await
{
Ok(bytes) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes)
}
Err(err) => {
warn!("Screenshot failed for export: {err}");
None
}
};
// Build feature name → description map from the precomputed features response
let feature_descriptions: FxHashMap<String, String> = state

View file

@ -1,4 +1,5 @@
use std::sync::Arc;
use std::sync::{Arc, LazyLock};
use std::time::Duration;
use axum::body::Body;
use axum::extract::Request;
@ -8,6 +9,17 @@ use tracing::warn;
use crate::state::AppState;
/// Dedicated HTTP client for proxying — does not follow redirects so 3xx
/// responses are passed through to the browser (needed for OAuth flows).
static PROXY_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
reqwest::Client::builder()
.redirect(reqwest::redirect::Policy::none())
.timeout(Duration::from_secs(30))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("Failed to build proxy HTTP client")
});
pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl IntoResponse {
let pb_url = state.pocketbase_url.trim_end_matches('/');
@ -21,7 +33,7 @@ pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl Int
let url = format!("{pb_url}{target_path}{query}");
let method = req.method().clone();
let mut builder = state.http_client.request(method, &url);
let mut builder = PROXY_CLIENT.request(method, &url);
// Forward only safe headers (allowlist)
const ALLOWED_HEADERS: &[&str] = &[
@ -37,6 +49,21 @@ pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl Int
}
}
// Forward client IP so PocketBase rate-limits per-user, not per-server.
// Prefer existing X-Forwarded-For (from reverse proxy), fall back to X-Real-IP.
if let Some(xff) = req.headers().get("x-forwarded-for") {
builder = builder.header("X-Forwarded-For", xff.clone());
// First IP in the chain is the original client
if let Ok(s) = xff.to_str() {
if let Some(client_ip) = s.split(',').next().map(str::trim) {
builder = builder.header("X-Real-IP", client_ip);
}
}
} else if let Some(real_ip) = req.headers().get("x-real-ip") {
builder = builder.header("X-Forwarded-For", real_ip.clone());
builder = builder.header("X-Real-IP", real_ip.clone());
}
// Forward body
let body_bytes = match axum::body::to_bytes(req.into_body(), 10 * 1024 * 1024).await {
Ok(bytes) => bytes,

View file

@ -1,55 +1,65 @@
use std::sync::Arc;
use axum::http::header::HeaderValue;
use axum::http::{header, HeaderMap, StatusCode, Uri};
use axum::response::IntoResponse;
use tracing::{info, warn};
use crate::state::AppState;
/// Fetch a PNG screenshot from the screenshot service.
/// Used by both the `/api/screenshot` proxy and the xlsx export.
pub async fn fetch_screenshot_bytes(
state: &AppState,
query_string: &str,
auth_header: Option<&HeaderValue>,
) -> Result<Vec<u8>, String> {
let url = format!("{}/screenshot?{}", state.screenshot_url, query_string);
info!("Fetching screenshot from: {}", url);
let mut req = state.http_client.get(&url);
if let Some(auth) = auth_header {
req = req.header(header::AUTHORIZATION, auth);
}
match req.send().await {
Ok(resp) if resp.status().is_success() => resp
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|err| format!("Failed to read screenshot response: {err}")),
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
Err(format!(
"Screenshot service returned {status}: {body}"
))
}
Err(err) => Err(format!("Failed to reach screenshot service: {err}")),
}
}
pub async fn get_screenshot(
state: Arc<AppState>,
headers: HeaderMap,
uri: Uri,
) -> impl IntoResponse {
let screenshot_base = &state.screenshot_url;
let qs = uri.query().unwrap_or_default();
let auth = headers.get(header::AUTHORIZATION);
let qs = uri
.query()
.map(|q| format!("?{q}"))
.unwrap_or_default();
let url = format!("{screenshot_base}/screenshot{qs}");
info!("Proxying screenshot request to: {}", url);
let mut req = state.http_client.get(&url);
if let Some(auth) = headers.get(header::AUTHORIZATION) {
req = req.header(header::AUTHORIZATION, auth);
}
match req.send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(bytes) => (
StatusCode::OK,
[
(header::CONTENT_TYPE, "image/png"),
(header::CACHE_CONTROL, "public, max-age=86400"),
],
bytes,
)
.into_response(),
Err(err) => {
warn!("Failed to read screenshot response: {}", err);
(StatusCode::BAD_GATEWAY, "Failed to read screenshot").into_response()
}
},
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
warn!("Screenshot service returned status {}: {}", status, body);
(StatusCode::BAD_GATEWAY, "Screenshot service error").into_response()
}
match fetch_screenshot_bytes(&state, qs, auth).await {
Ok(bytes) => (
StatusCode::OK,
[
(header::CONTENT_TYPE, "image/png"),
(header::CACHE_CONTROL, "public, max-age=86400"),
],
bytes,
)
.into_response(),
Err(err) => {
warn!("Failed to reach screenshot service: {}", err);
(StatusCode::BAD_GATEWAY, "Screenshot service unavailable").into_response()
warn!("{err}");
(StatusCode::BAD_GATEWAY, "Screenshot service error").into_response()
}
}
}