Deploy again

This commit is contained in:
Andras Schmelczer 2026-02-19 22:24:06 +00:00
parent ffe080adef
commit 787428f1a5
18 changed files with 717 additions and 223 deletions

View file

@ -40,10 +40,10 @@ pub fn build_ollama_schema(_features: &FeaturesResponse) -> Value {
"type": "object",
"properties": {
"name": { "type": "string" },
"min": { "type": "number" },
"max": { "type": "number" }
"bound": { "type": "string", "enum": ["min", "max"] },
"value": { "type": "number" }
},
"required": ["name"]
"required": ["name", "bound", "value"]
}
},
"enum_filters": {
@ -80,11 +80,11 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
Rules:\n\
- ONLY set filters the user explicitly mentioned or clearly implied.\n\
- Leave out any filter the user did not mention. Empty arrays are fine.\n\
- For numeric filters, omit \"min\" to leave the lower bound open, \
omit \"max\" to leave the upper bound open.\n\
- Each numeric filter sets ONE bound only: \"min\" (at least this value) \
or \"max\" (at most this value). Never set two filters on the same feature.\n\
- Use EXACT feature names from the list spelling, capitalisation, and punctuation must match.\n\
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on crime features. \
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 2km.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
@ -98,6 +98,10 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
// Feature catalogue
parts.push("\n--- AVAILABLE FEATURES ---\n".to_string());
for group in &features.groups {
// Skip individual crime features — only expose "Crime summary" aggregates
if group.name == "Crime" {
continue;
}
parts.push(format!("## {}", group.name));
for feature in &group.features {
match feature {
@ -141,7 +145,7 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"User: \"cheap freehold house under 400k\"\n\
Output: {\"numeric_filters\": [{\"name\": \"Last known price\", \"max\": 400000}], \
Output: {\"numeric_filters\": [{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 400000}], \
\"enum_filters\": [{\"name\": \"Leashold/Freehold\", \"values\": [\"Freehold\"]}, \
{\"name\": \"Property type\", \"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
\"notes\": \"\"}"
@ -151,12 +155,12 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"safe quiet area with good schools and parks\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Violence and sexual offences (avg/yr)\", \"max\": 20}, \
{\"name\": \"Burglary (avg/yr)\", \"max\": 10}, \
{\"name\": \"Noise (dB)\", \"max\": 55}, \
{\"name\": \"Good+ primary schools within 5km\", \"min\": 5}, \
{\"name\": \"Good+ secondary schools within 5km\", \"min\": 2}, \
{\"name\": \"Number of parks within 2km\", \"min\": 3}], \
{\"name\": \"Serious crime (avg/yr)\", \"bound\": \"max\", \"value\": 20}, \
{\"name\": \"Minor crime (avg/yr)\", \"bound\": \"max\", \"value\": 50}, \
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Good+ primary schools within 5km\", \"bound\": \"min\", \"value\": 5}, \
{\"name\": \"Good+ secondary schools within 5km\", \"bound\": \"min\", \"value\": 2}, \
{\"name\": \"Number of parks within 2km\", \"bound\": \"min\", \"value\": 3}], \
\"enum_filters\": [], \"notes\": \"\"}"
.to_string(),
);
@ -164,9 +168,9 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"3 bed flat under 300k with fast broadband near the beach\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Last known price\", \"max\": 300000}, \
{\"name\": \"Number of bedrooms & living rooms\", \"min\": 4}, \
{\"name\": \"Max available download speed (Mbps)\", \"min\": 100}], \
{\"name\": \"Last known price\", \"bound\": \"max\", \"value\": 300000}, \
{\"name\": \"Number of bedrooms & living rooms\", \"bound\": \"min\", \"value\": 4}, \
{\"name\": \"Max available download speed (Mbps)\", \"bound\": \"min\", \"value\": 100}], \
\"enum_filters\": [{\"name\": \"Property type\", \"values\": [\"Flat\"]}], \
\"notes\": \"No filter for: beach proximity\"}"
.to_string(),
@ -175,9 +179,9 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
parts.push(
"\nUser: \"large family home with a garden near restaurants\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Total floor area (sqm)\", \"min\": 100}, \
{\"name\": \"Number of bedrooms & living rooms\", \"min\": 5}, \
{\"name\": \"Number of restaurants within 2km\", \"min\": 10}], \
{\"name\": \"Total floor area (sqm)\", \"bound\": \"min\", \"value\": 100}, \
{\"name\": \"Number of bedrooms & living rooms\", \"bound\": \"min\", \"value\": 5}, \
{\"name\": \"Number of restaurants within 2km\", \"bound\": \"min\", \"value\": 10}], \
\"enum_filters\": [{\"name\": \"Property type\", \
\"values\": [\"Detached\", \"Semi-Detached\"]}], \
\"notes\": \"No filter for: garden\"}"
@ -187,7 +191,7 @@ pub fn build_system_prompt(features: &FeaturesResponse) -> String {
// Output format reminder
parts.push(
"\n--- OUTPUT FORMAT ---\n\
{\"numeric_filters\": [...], \"enum_filters\": [...], \"notes\": \"...\"}\n\
{\"numeric_filters\": [{\"name\": \"...\", \"bound\": \"min\"|\"max\", \"value\": N}, ...], \"enum_filters\": [...], \"notes\": \"...\"}\n\
Respond with ONLY the JSON object. No explanation."
.to_string(),
);
@ -244,10 +248,10 @@ pub async fn post_ai_filters(
/// Validate LLM output against feature metadata and convert to FeatureFilters format.
///
/// Input format (array-based, grammar-friendly):
/// Input format (array-based, each numeric filter sets one bound):
/// ```json
/// {
/// "numeric_filters": [{"name": "Last known price", "min": 0, "max": 300000}],
/// "numeric_filters": [{"name": "Last known price", "bound": "max", "value": 300000}],
/// "enum_filters": [{"name": "Leashold/Freehold", "values": ["Freehold"]}]
/// }
/// ```
@ -278,7 +282,7 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
}
}
// Process numeric filters
// Process numeric filters — each sets one bound (min or max)
if let Some(arr) = raw.get("numeric_filters").and_then(|val| val.as_array()) {
for item in arr {
let name = match item.get("name").and_then(|val| val.as_str()) {
@ -289,16 +293,19 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
Some(range) => *range,
None => continue,
};
let filter_min = item
.get("min")
.and_then(|val| val.as_f64())
.map(|num| num.max(feat_min as f64).min(feat_max as f64) as f32)
.unwrap_or(feat_min);
let filter_max = item
.get("max")
.and_then(|val| val.as_f64())
.map(|num| num.max(feat_min as f64).min(feat_max as f64) as f32)
.unwrap_or(feat_max);
let bound = match item.get("bound").and_then(|val| val.as_str()) {
Some(b) => b,
None => continue,
};
let value = match item.get("value").and_then(|val| val.as_f64()) {
Some(v) => v.max(feat_min as f64).min(feat_max as f64) as f32,
None => continue,
};
let (filter_min, filter_max) = match bound {
"min" => (value, feat_max),
"max" => (feat_min, value),
_ => continue,
};
// Only include if range is narrower than full range
if filter_min > feat_min || filter_max < feat_max {
result.insert(name.to_string(), json!([filter_min, filter_max]));

View file

@ -3,7 +3,7 @@ use std::hash::{Hash, Hasher};
use std::sync::Arc;
use axum::extract::Query;
use axum::http::{header, StatusCode};
use axum::http::{header, HeaderMap, StatusCode};
use axum::response::IntoResponse;
use axum::Extension;
use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook};
@ -14,7 +14,7 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::licensing::check_license_bounds;
use crate::parsing::{parse_field_indices, parse_filters, require_bounds, row_passes_filters};
use crate::routes::FeatureInfo;
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
use crate::state::AppState;
const MAX_EXPORT_POSTCODES: usize = 250;
@ -120,39 +120,9 @@ fn build_frontend_params(
parts.join("&")
}
/// Fetch a screenshot image from the screenshot service for Excel export.
async fn fetch_screenshot(
state: &AppState,
frontend_params: &str,
) -> Option<Vec<u8>> {
let screenshot_base = &state.screenshot_url;
let url = format!("{}/screenshot?{}", screenshot_base, frontend_params);
match state.http_client.get(&url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(bytes) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes.to_vec())
}
Err(err) => {
warn!("Failed to read screenshot response for export: {err}");
None
}
},
Ok(resp) => {
warn!(status = %resp.status(), "Screenshot service returned error for export");
None
}
Err(err) => {
warn!("Failed to reach screenshot service for export: {err}");
None
}
}
}
pub async fn get_export(
state: Arc<AppState>,
headers: HeaderMap,
Extension(user): Extension<OptionalUser>,
Query(params): Query<ExportParams>,
) -> Result<impl IntoResponse, axum::response::Response> {
@ -186,7 +156,18 @@ pub async fn get_export(
build_frontend_params(center_lat, center_lon, zoom, filters_str.as_deref());
// Fetch screenshot (async, before spawn_blocking)
let screenshot_bytes = fetch_screenshot(&state, &frontend_params).await;
let auth_header = headers.get(header::AUTHORIZATION);
let screenshot_bytes = match fetch_screenshot_bytes(&state, &frontend_params, auth_header).await
{
Ok(bytes) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes)
}
Err(err) => {
warn!("Screenshot failed for export: {err}");
None
}
};
// Build feature name → description map from the precomputed features response
let feature_descriptions: FxHashMap<String, String> = state

View file

@ -1,4 +1,5 @@
use std::sync::Arc;
use std::sync::{Arc, LazyLock};
use std::time::Duration;
use axum::body::Body;
use axum::extract::Request;
@ -8,6 +9,17 @@ use tracing::warn;
use crate::state::AppState;
/// Dedicated HTTP client for proxying — does not follow redirects so 3xx
/// responses are passed through to the browser (needed for OAuth flows).
static PROXY_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
reqwest::Client::builder()
.redirect(reqwest::redirect::Policy::none())
.timeout(Duration::from_secs(30))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("Failed to build proxy HTTP client")
});
pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl IntoResponse {
let pb_url = state.pocketbase_url.trim_end_matches('/');
@ -21,7 +33,7 @@ pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl Int
let url = format!("{pb_url}{target_path}{query}");
let method = req.method().clone();
let mut builder = state.http_client.request(method, &url);
let mut builder = PROXY_CLIENT.request(method, &url);
// Forward only safe headers (allowlist)
const ALLOWED_HEADERS: &[&str] = &[
@ -37,6 +49,21 @@ pub async fn proxy_to_pocketbase(state: Arc<AppState>, req: Request) -> impl Int
}
}
// Forward client IP so PocketBase rate-limits per-user, not per-server.
// Prefer existing X-Forwarded-For (from reverse proxy), fall back to X-Real-IP.
if let Some(xff) = req.headers().get("x-forwarded-for") {
builder = builder.header("X-Forwarded-For", xff.clone());
// First IP in the chain is the original client
if let Ok(s) = xff.to_str() {
if let Some(client_ip) = s.split(',').next().map(str::trim) {
builder = builder.header("X-Real-IP", client_ip);
}
}
} else if let Some(real_ip) = req.headers().get("x-real-ip") {
builder = builder.header("X-Forwarded-For", real_ip.clone());
builder = builder.header("X-Real-IP", real_ip.clone());
}
// Forward body
let body_bytes = match axum::body::to_bytes(req.into_body(), 10 * 1024 * 1024).await {
Ok(bytes) => bytes,

View file

@ -1,55 +1,65 @@
use std::sync::Arc;
use axum::http::header::HeaderValue;
use axum::http::{header, HeaderMap, StatusCode, Uri};
use axum::response::IntoResponse;
use tracing::{info, warn};
use crate::state::AppState;
/// Fetch a PNG screenshot from the screenshot service.
/// Used by both the `/api/screenshot` proxy and the xlsx export.
pub async fn fetch_screenshot_bytes(
state: &AppState,
query_string: &str,
auth_header: Option<&HeaderValue>,
) -> Result<Vec<u8>, String> {
let url = format!("{}/screenshot?{}", state.screenshot_url, query_string);
info!("Fetching screenshot from: {}", url);
let mut req = state.http_client.get(&url);
if let Some(auth) = auth_header {
req = req.header(header::AUTHORIZATION, auth);
}
match req.send().await {
Ok(resp) if resp.status().is_success() => resp
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|err| format!("Failed to read screenshot response: {err}")),
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
Err(format!(
"Screenshot service returned {status}: {body}"
))
}
Err(err) => Err(format!("Failed to reach screenshot service: {err}")),
}
}
pub async fn get_screenshot(
state: Arc<AppState>,
headers: HeaderMap,
uri: Uri,
) -> impl IntoResponse {
let screenshot_base = &state.screenshot_url;
let qs = uri.query().unwrap_or_default();
let auth = headers.get(header::AUTHORIZATION);
let qs = uri
.query()
.map(|q| format!("?{q}"))
.unwrap_or_default();
let url = format!("{screenshot_base}/screenshot{qs}");
info!("Proxying screenshot request to: {}", url);
let mut req = state.http_client.get(&url);
if let Some(auth) = headers.get(header::AUTHORIZATION) {
req = req.header(header::AUTHORIZATION, auth);
}
match req.send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(bytes) => (
StatusCode::OK,
[
(header::CONTENT_TYPE, "image/png"),
(header::CACHE_CONTROL, "public, max-age=86400"),
],
bytes,
)
.into_response(),
Err(err) => {
warn!("Failed to read screenshot response: {}", err);
(StatusCode::BAD_GATEWAY, "Failed to read screenshot").into_response()
}
},
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
warn!("Screenshot service returned status {}: {}", status, body);
(StatusCode::BAD_GATEWAY, "Screenshot service error").into_response()
}
match fetch_screenshot_bytes(&state, qs, auth).await {
Ok(bytes) => (
StatusCode::OK,
[
(header::CONTENT_TYPE, "image/png"),
(header::CACHE_CONTROL, "public, max-age=86400"),
],
bytes,
)
.into_response(),
Err(err) => {
warn!("Failed to reach screenshot service: {}", err);
(StatusCode::BAD_GATEWAY, "Screenshot service unavailable").into_response()
warn!("{err}");
(StatusCode::BAD_GATEWAY, "Screenshot service error").into_response()
}
}
}