changes
This commit is contained in:
parent
524580eb25
commit
ffe080adef
82 changed files with 2652 additions and 2956 deletions
|
|
@ -14,13 +14,15 @@ pub const MAX_PROPERTIES_LIMIT: usize = 500;
|
|||
pub const MAX_PRICE_HISTORY_POINTS: usize = 5000;
|
||||
pub const POSTCODE_SEARCH_OFFSET: f64 = 0.02;
|
||||
|
||||
pub const AREA_SUMMARY_SYSTEM_PROMPT: &str = "You are an experienced estate agent with an expertise in area analysis. Help the user find his/her dream area or perfect postcode to settle in. The user is looking to buy a property based on the filters they provide. Given area statistics, write at most a single concise sentences summarising the key characteristics of the area. Be factual and highlight notable values. Do not use bullet points or headers — just flowing prose. Do not use markdown formatting. Highlight unusual facts that stand out from the average, but do not exaggerate. If there are no notable characteristics, say so. Always write at most a single sentence! Reason about the relation of different statistics to each other.";
|
||||
pub const AREA_SUMMARY_MAX_TOKENS: usize = 300;
|
||||
pub const AREA_SUMMARY_TEMPERATURE: f32 = 0.3;
|
||||
|
||||
pub const AI_FILTERS_MAX_TOKENS: usize = 2000;
|
||||
pub const AI_FILTERS_TEMPERATURE: f32 = 0.0;
|
||||
|
||||
/// Inner London free zone bounds (south, west, north, east) — roughly zones 1–2.
|
||||
/// Users without a license can only query data within these bounds.
|
||||
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.48, -0.18, 51.54, -0.02);
|
||||
|
||||
/// Homepage demo center (lat, lng). Unlicensed hexagon requests are allowed
|
||||
/// when the center of the requested bounds is within DEMO_CENTER_TOLERANCE of this point.
|
||||
/// Must match DEMO_VIEW_START in ScrollStory.tsx.
|
||||
pub const DEMO_CENTER: (f64, f64) = (52.2, -1.9);
|
||||
pub const DEMO_CENTER_TOLERANCE: f64 = 1.0;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use std::path::Path;
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, HISTOGRAM_BINS};
|
||||
use crate::features::{self, Bounds, IGNORED_COLUMNS};
|
||||
use crate::features::{self, Bounds};
|
||||
|
||||
fn is_numeric_dtype(dtype: &DataType) -> bool {
|
||||
matches!(
|
||||
|
|
@ -122,6 +122,13 @@ pub struct PropertyData {
|
|||
/// Per-row renovation events. Keyed by (permuted) row index.
|
||||
/// Only rows with events are present in the map.
|
||||
renovation_history: FxHashMap<u32, Vec<RenovationEvent>>,
|
||||
/// Per-row listing features (key feature bullet points from online listings).
|
||||
/// Only rows with features are present in the map.
|
||||
listing_features: FxHashMap<u32, Vec<String>>,
|
||||
/// Per-row optional string columns from online listings.
|
||||
listing_url: Vec<Option<String>>,
|
||||
property_sub_type: Vec<Option<String>>,
|
||||
price_qualifier: Vec<Option<String>>,
|
||||
}
|
||||
|
||||
impl PropertyData {
|
||||
|
|
@ -155,6 +162,29 @@ impl PropertyData {
|
|||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Get listing features for a given row (empty slice if none).
|
||||
pub fn listing_features(&self, row: usize) -> &[String] {
|
||||
self.listing_features
|
||||
.get(&(row as u32))
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Get listing URL for a given row.
|
||||
pub fn listing_url(&self, row: usize) -> Option<&str> {
|
||||
self.listing_url[row].as_deref()
|
||||
}
|
||||
|
||||
/// Get property sub-type for a given row.
|
||||
pub fn property_sub_type(&self, row: usize) -> Option<&str> {
|
||||
self.property_sub_type[row].as_deref()
|
||||
}
|
||||
|
||||
/// Get price qualifier for a given row.
|
||||
pub fn price_qualifier(&self, row: usize) -> Option<&str> {
|
||||
self.price_qualifier[row].as_deref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute a percentile from a uniformly-binned histogram.
|
||||
|
|
@ -375,73 +405,226 @@ pub fn precompute_h3(lat: &[f32], lon: &[f32]) -> anyhow::Result<Vec<u64>> {
|
|||
}
|
||||
|
||||
impl PropertyData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
tracing::info!("Loading parquet from {:?}", parquet_path);
|
||||
pub fn load(
|
||||
properties_path: &Path,
|
||||
postcode_features_path: &Path,
|
||||
listings_buy_path: &Path,
|
||||
listings_rent_path: &Path,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Load postcode.parquet
|
||||
tracing::info!("Loading postcode features from {:?}", postcode_features_path);
|
||||
let postcode_df = LazyFrame::scan_parquet(postcode_features_path, Default::default())
|
||||
.context("Failed to scan postcode parquet")?
|
||||
.collect()
|
||||
.context("Failed to read postcode parquet")?;
|
||||
tracing::info!(rows = postcode_df.height(), "Postcode features loaded");
|
||||
|
||||
let mut lf = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan parquet")?;
|
||||
let schema = lf.collect_schema().context("Failed to read schema")?;
|
||||
// Load properties.parquet and join with postcode data for lat/lon + area features
|
||||
tracing::info!("Loading properties from {:?}", properties_path);
|
||||
let properties_lf = LazyFrame::scan_parquet(properties_path, Default::default())
|
||||
.context("Failed to scan properties parquet")?
|
||||
.with_columns([lit("Historical sale").alias("Listing status")]);
|
||||
let properties_joined = properties_lf
|
||||
.join(
|
||||
postcode_df.clone().lazy(),
|
||||
[col("Postcode")],
|
||||
[col("Postcode")],
|
||||
JoinArgs::new(JoinType::Left),
|
||||
)
|
||||
.collect()
|
||||
.context("Failed to join properties with postcodes")?;
|
||||
let prop_count = properties_joined.height();
|
||||
tracing::info!(rows = prop_count, "Properties joined with postcodes");
|
||||
|
||||
// Load online listings (buy + rent) — these have their own lat/lon.
|
||||
// Normalize column names from finder output to server-expected names.
|
||||
// strict=false: columns already using the new name are silently skipped.
|
||||
let load_listings = |path: &Path, label: &str| -> anyhow::Result<DataFrame> {
|
||||
tracing::info!("Loading {} listings from {:?}", label, path);
|
||||
let mut lf = LazyFrame::scan_parquet(path, Default::default())
|
||||
.with_context(|| format!("Failed to scan {label} listings parquet"))?;
|
||||
let schema = lf
|
||||
.collect_schema()
|
||||
.with_context(|| format!("Failed to read {label} listings schema"))?;
|
||||
|
||||
// Rename raw finder columns → server-expected names (no-op if already renamed)
|
||||
let lf = lf.rename(
|
||||
[
|
||||
"postcode",
|
||||
"address",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"bedrooms",
|
||||
"bathrooms",
|
||||
"total_rooms",
|
||||
"tenure",
|
||||
"property_type",
|
||||
"property_sub_type",
|
||||
"price_qualifier",
|
||||
"floorspace_sqm",
|
||||
"url",
|
||||
"features",
|
||||
],
|
||||
[
|
||||
"Postcode",
|
||||
"Address per Property Register",
|
||||
"lat",
|
||||
"lon",
|
||||
"Bedrooms",
|
||||
"Bathrooms",
|
||||
"Number of bedrooms & living rooms",
|
||||
"Leashold/Freehold",
|
||||
"Property type",
|
||||
"Property sub-type",
|
||||
"Price qualifier",
|
||||
"Total floor area (sqm)",
|
||||
"Listing URL",
|
||||
"Listing features",
|
||||
],
|
||||
false,
|
||||
);
|
||||
|
||||
// Derive missing columns for raw finder output that doesn't have them
|
||||
let listing_status = if label == "buy" {
|
||||
"For sale"
|
||||
} else {
|
||||
"For rent"
|
||||
};
|
||||
let lf = if schema.get("Listing status").is_none() {
|
||||
lf.with_column(lit(listing_status).alias("Listing status"))
|
||||
} else {
|
||||
lf
|
||||
};
|
||||
let lf = if schema.get("Asking price").is_none() && schema.get("price").is_some() {
|
||||
if label == "buy" {
|
||||
lf.with_column(col("price").alias("Asking price"))
|
||||
} else {
|
||||
// Normalize rent to monthly: weekly×52/12, yearly÷12
|
||||
lf.with_column(
|
||||
when(col("price_frequency").eq(lit("weekly")))
|
||||
.then(col("price").cast(DataType::Float64) * lit(52.0 / 12.0))
|
||||
.when(col("price_frequency").eq(lit("yearly")))
|
||||
.then(col("price").cast(DataType::Float64) / lit(12.0))
|
||||
.otherwise(col("price").cast(DataType::Float64))
|
||||
.cast(DataType::Int64)
|
||||
.alias("Asking rent (monthly)"),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
lf
|
||||
};
|
||||
|
||||
// Join with postcodes for area features (listings have their own lat/lon)
|
||||
let pc_no_coords = postcode_df.clone().lazy().drop(["lat", "lon"]);
|
||||
let joined = lf
|
||||
.join(
|
||||
pc_no_coords,
|
||||
[col("Postcode")],
|
||||
[col("Postcode")],
|
||||
JoinArgs::new(JoinType::Left),
|
||||
)
|
||||
.collect()
|
||||
.with_context(|| format!("Failed to join {label} listings with postcodes"))?;
|
||||
tracing::info!(rows = joined.height(), "{} listings joined", label);
|
||||
Ok(joined)
|
||||
};
|
||||
let listings_buy = load_listings(listings_buy_path, "buy")?;
|
||||
let listings_rent = load_listings(listings_rent_path, "rent")?;
|
||||
|
||||
// Concatenate all rows into a single DataFrame
|
||||
tracing::info!("Concatenating all data sources");
|
||||
let buy_count = listings_buy.height();
|
||||
let rent_count = listings_rent.height();
|
||||
let mut combined = concat(
|
||||
[
|
||||
properties_joined.lazy(),
|
||||
listings_buy.lazy(),
|
||||
listings_rent.lazy(),
|
||||
],
|
||||
UnionArgs {
|
||||
parallel: false,
|
||||
rechunk: true,
|
||||
to_supertypes: true,
|
||||
diagonal: true,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.context("Failed to concat data sources")?
|
||||
.collect()
|
||||
.context("Failed to collect combined data")?;
|
||||
|
||||
let total_rows = combined.height();
|
||||
tracing::info!(
|
||||
properties = prop_count,
|
||||
buy_listings = buy_count,
|
||||
rent_listings = rent_count,
|
||||
total = total_rows,
|
||||
"All data sources combined"
|
||||
);
|
||||
|
||||
// Get configured feature/enum names in config order
|
||||
let numeric_names = features::all_numeric_feature_names();
|
||||
let enum_names = features::all_enum_feature_names();
|
||||
|
||||
// Validate: every configured numeric feature must exist in parquet as numeric
|
||||
// Fill in NaN/empty placeholder columns for features that don't exist in all
|
||||
// sources (e.g. Listing date only comes from listings, Estimated current price
|
||||
// only from properties). Without this, diagonal concat leaves them absent.
|
||||
{
|
||||
let schema = combined.schema();
|
||||
let mut fill_exprs: Vec<Expr> = Vec::new();
|
||||
for &name in &numeric_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding NaN placeholder for missing numeric feature");
|
||||
fill_exprs.push(lit(f32::NAN).alias(name));
|
||||
}
|
||||
}
|
||||
for &name in &enum_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding empty placeholder for missing enum feature");
|
||||
fill_exprs.push(lit("").alias(name));
|
||||
}
|
||||
}
|
||||
if !fill_exprs.is_empty() {
|
||||
combined = combined
|
||||
.lazy()
|
||||
.with_columns(fill_exprs)
|
||||
.collect()
|
||||
.context("Failed to add placeholder columns for missing features")?;
|
||||
}
|
||||
}
|
||||
|
||||
let schema = combined.schema();
|
||||
|
||||
// Validate: every configured feature exists in combined schema
|
||||
for name in &numeric_names {
|
||||
match schema.get(name) {
|
||||
Some(dtype) if is_numeric_dtype(dtype) => {}
|
||||
Some(dtype) => bail!(
|
||||
"Configured numeric feature '{}' has non-numeric type {:?} in parquet",
|
||||
"Configured numeric feature '{}' has non-numeric type {:?}",
|
||||
name,
|
||||
dtype
|
||||
),
|
||||
None => bail!(
|
||||
"Configured numeric feature '{}' not found in parquet schema",
|
||||
"Configured numeric feature '{}' not found in combined schema",
|
||||
name
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Validate: every configured enum feature must exist in parquet as string
|
||||
for name in &enum_names {
|
||||
match schema.get(name) {
|
||||
Some(dtype) if matches!(dtype, DataType::String) || dtype.is_categorical() => {}
|
||||
Some(dtype) => bail!(
|
||||
"Configured enum feature '{}' has unexpected type {:?} in parquet",
|
||||
"Configured enum feature '{}' has unexpected type {:?}",
|
||||
name,
|
||||
dtype
|
||||
),
|
||||
None => bail!(
|
||||
"Configured enum feature '{}' not found in parquet schema",
|
||||
"Configured enum feature '{}' not found in combined schema",
|
||||
name
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Validate: every parquet column must be accounted for
|
||||
let all_known: std::collections::HashSet<&str> = numeric_names
|
||||
.iter()
|
||||
.chain(enum_names.iter())
|
||||
.copied()
|
||||
.chain(IGNORED_COLUMNS.iter().copied())
|
||||
.collect();
|
||||
|
||||
for (col_name, dtype) in schema.iter() {
|
||||
let name = col_name.as_str();
|
||||
if all_known.contains(name) {
|
||||
continue;
|
||||
}
|
||||
// Skip non-simple types (List, Struct, etc.)
|
||||
if matches!(dtype, DataType::List(_) | DataType::Struct(_)) {
|
||||
tracing::debug!(column = %name, dtype = ?dtype, "Skipping complex-type column");
|
||||
continue;
|
||||
}
|
||||
bail!(
|
||||
"Unknown column '{}' (type {:?}) in parquet — add it to features.rs config or IGNORED_COLUMNS",
|
||||
name, dtype
|
||||
);
|
||||
}
|
||||
|
||||
// Combine numeric and enum feature names (numeric first, then enum)
|
||||
let feature_names: Vec<String> = numeric_names
|
||||
.iter()
|
||||
|
|
@ -457,7 +640,7 @@ impl PropertyData {
|
|||
"Feature columns from config"
|
||||
);
|
||||
|
||||
// Build select expressions
|
||||
// Build select expressions for the combined DataFrame
|
||||
let mut select_exprs: Vec<polars::prelude::Expr> = vec![];
|
||||
select_exprs.push(col("lat").cast(DataType::Float32));
|
||||
select_exprs.push(col("lon").cast(DataType::Float32));
|
||||
|
|
@ -465,7 +648,6 @@ impl PropertyData {
|
|||
// Select numeric features as Float32 (datetime columns → fractional year)
|
||||
for &name in &numeric_names {
|
||||
if is_datetime_dtype(schema.get(name).unwrap()) {
|
||||
// Convert datetime to fractional year: year + (month - 1) / 12
|
||||
select_exprs.push(
|
||||
(col(name).dt().year().cast(DataType::Float32)
|
||||
+ (col(name).dt().month().cast(DataType::Float32) - lit(1.0f32))
|
||||
|
|
@ -477,42 +659,47 @@ impl PropertyData {
|
|||
}
|
||||
}
|
||||
|
||||
// String columns for address/postcode
|
||||
// String columns for address/postcode and online listing metadata
|
||||
for &string_col_name in &[
|
||||
"Address per Property Register",
|
||||
"Address per EPC",
|
||||
"Postcode",
|
||||
"Listing URL",
|
||||
"Property sub-type",
|
||||
"Price qualifier",
|
||||
] {
|
||||
if schema.get(string_col_name).is_some() {
|
||||
select_exprs.push(col(string_col_name).cast(DataType::String));
|
||||
}
|
||||
}
|
||||
|
||||
// Enum features as String (will be encoded to f32 indices later)
|
||||
// Enum features as String
|
||||
for &name in &enum_names {
|
||||
select_exprs.push(col(name).cast(DataType::String));
|
||||
}
|
||||
|
||||
// Optional boolean column for construction date approximation
|
||||
// Optional columns
|
||||
let has_approx_col = schema.get("Is construction date approximate").is_some();
|
||||
if has_approx_col {
|
||||
select_exprs.push(col("Is construction date approximate").cast(DataType::Float32));
|
||||
}
|
||||
|
||||
// Optional renovation history (List<Struct{year, event}>)
|
||||
let has_renovation_history = schema.get("renovation_history").is_some();
|
||||
if has_renovation_history {
|
||||
select_exprs.push(col("renovation_history"));
|
||||
}
|
||||
let has_listing_features = schema.get("Listing features").is_some();
|
||||
if has_listing_features {
|
||||
select_exprs.push(col("Listing features"));
|
||||
}
|
||||
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan parquet")?
|
||||
let df = combined
|
||||
.lazy()
|
||||
.select(select_exprs)
|
||||
.collect()
|
||||
.context("Failed to read parquet")?;
|
||||
.context("Failed to select columns from combined data")?;
|
||||
|
||||
let row_count = df.height();
|
||||
tracing::info!(rows = row_count, "Parquet loaded");
|
||||
tracing::info!(rows = row_count, "Combined data selected");
|
||||
|
||||
let lat_series = df
|
||||
.column("lat")
|
||||
|
|
@ -586,6 +773,35 @@ impl PropertyData {
|
|||
let address_raw = extract_string_col(&df, "Address per Property Register")?;
|
||||
let postcode_raw = extract_string_col(&df, "Postcode")?;
|
||||
|
||||
// Extract optional string columns for online listing metadata
|
||||
let extract_optional_string_col =
|
||||
|df: &DataFrame, name: &str| -> anyhow::Result<Vec<Option<String>>> {
|
||||
if let Ok(column) = df.column(name) {
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
.into_iter()
|
||||
.map(|value| {
|
||||
value.and_then(|s| {
|
||||
let trimmed = s.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
} else {
|
||||
Ok(vec![None; row_count])
|
||||
}
|
||||
};
|
||||
|
||||
let listing_url_raw = extract_optional_string_col(&df, "Listing URL")?;
|
||||
let property_sub_type_raw = extract_optional_string_col(&df, "Property sub-type")?;
|
||||
let price_qualifier_raw = extract_optional_string_col(&df, "Price qualifier")?;
|
||||
|
||||
tracing::info!("Building enum features");
|
||||
// enum_col_major: Vec<(values_list, encoded_as_f32)>
|
||||
let mut enum_col_major: Vec<(Vec<String>, Vec<f32>)> = Vec::new();
|
||||
|
|
@ -689,7 +905,7 @@ impl PropertyData {
|
|||
let mut history: FxHashMap<u32, Vec<RenovationEvent>> = FxHashMap::default();
|
||||
for old_row in 0..row_count {
|
||||
if let Some(inner) = list_ca.get_as_series(old_row) {
|
||||
if inner.len() == 0 {
|
||||
if inner.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let structs = inner
|
||||
|
|
@ -727,6 +943,44 @@ impl PropertyData {
|
|||
FxHashMap::default()
|
||||
};
|
||||
|
||||
// Extract listing features: List<String>
|
||||
let mut listing_features_raw: FxHashMap<u32, Vec<String>> = if has_listing_features {
|
||||
tracing::info!("Extracting listing features");
|
||||
let feat_col = df
|
||||
.column("Listing features")
|
||||
.context("Missing Listing features column")?;
|
||||
let list_ca = feat_col
|
||||
.list()
|
||||
.context("Listing features is not a list column")?;
|
||||
|
||||
let mut features_map: FxHashMap<u32, Vec<String>> = FxHashMap::default();
|
||||
for old_row in 0..row_count {
|
||||
if let Some(inner) = list_ca.get_as_series(old_row) {
|
||||
if inner.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let str_ca = inner
|
||||
.str()
|
||||
.context("Listing features inner is not a string series")?;
|
||||
let items: Vec<String> = str_ca
|
||||
.into_iter()
|
||||
.filter_map(|v| v.map(|s| s.to_string()))
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
if !items.is_empty() {
|
||||
features_map.insert(old_row as u32, items);
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
properties_with_features = features_map.len(),
|
||||
"Listing features extracted"
|
||||
);
|
||||
features_map
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
};
|
||||
|
||||
// Sort all rows by spatial locality so that grid queries access
|
||||
// contiguous memory (sequential reads instead of random DRAM accesses).
|
||||
tracing::info!("Sorting rows by spatial locality");
|
||||
|
|
@ -796,6 +1050,32 @@ impl PropertyData {
|
|||
map
|
||||
};
|
||||
|
||||
// Re-key listing_features by permuted row index
|
||||
let listing_features: FxHashMap<u32, Vec<String>> = {
|
||||
let mut map =
|
||||
FxHashMap::with_capacity_and_hasher(listing_features_raw.len(), Default::default());
|
||||
for (new_row, &old_row) in perm.iter().enumerate() {
|
||||
if let Some(items) = listing_features_raw.remove(&old_row) {
|
||||
map.insert(new_row as u32, items);
|
||||
}
|
||||
}
|
||||
map
|
||||
};
|
||||
|
||||
// Permute optional string columns
|
||||
let listing_url: Vec<Option<String>> = perm
|
||||
.iter()
|
||||
.map(|&old_row| listing_url_raw[old_row as usize].clone())
|
||||
.collect();
|
||||
let property_sub_type: Vec<Option<String>> = perm
|
||||
.iter()
|
||||
.map(|&old_row| property_sub_type_raw[old_row as usize].clone())
|
||||
.collect();
|
||||
let price_qualifier: Vec<Option<String>> = perm
|
||||
.iter()
|
||||
.map(|&old_row| price_qualifier_raw[old_row as usize].clone())
|
||||
.collect();
|
||||
|
||||
// Build enum_values map: feature_index -> list of string values
|
||||
let mut enum_values: rustc_hash::FxHashMap<usize, Vec<String>> =
|
||||
rustc_hash::FxHashMap::default();
|
||||
|
|
@ -857,6 +1137,10 @@ impl PropertyData {
|
|||
enum_values,
|
||||
approx_build_date_bits,
|
||||
renovation_history,
|
||||
listing_features,
|
||||
listing_url,
|
||||
property_sub_type,
|
||||
price_qualifier,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,21 +58,6 @@ pub struct EnumFeatureGroup {
|
|||
pub features: &'static [EnumFeatureConfig],
|
||||
}
|
||||
|
||||
/// Columns in parquet that are not filterable
|
||||
pub const IGNORED_COLUMNS: &[&str] = &[
|
||||
"lat",
|
||||
"lon",
|
||||
"Address per Property Register",
|
||||
"Address per EPC",
|
||||
"Postcode",
|
||||
"historical_prices",
|
||||
"Is construction date approximate",
|
||||
"Current energy rating",
|
||||
"Potential energy rating",
|
||||
"Property sub-type",
|
||||
"Listing URL",
|
||||
"Price qualifier",
|
||||
];
|
||||
|
||||
pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
||||
FeatureGroup {
|
||||
|
|
@ -964,6 +949,20 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
detail: "From HM Land Registry Price Paid data. The broad property type classification: Detached, Semi-Detached, Terraced, or Flat/Maisonette.",
|
||||
source: "price-paid",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Current energy rating",
|
||||
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||
description: "Current EPC energy efficiency rating (A = best, G = worst)",
|
||||
detail: "The current energy efficiency rating from the Energy Performance Certificate. Ranges from A (most efficient) to G (least efficient). Based on the property's energy use per square metre of floor area.",
|
||||
source: "epc",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Potential energy rating",
|
||||
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||
description: "Potential EPC rating if all recommended improvements were made",
|
||||
detail: "The potential energy efficiency rating from the Energy Performance Certificate if all cost-effective improvements recommended in the EPC report were carried out. Ranges from A (most efficient) to G (least efficient).",
|
||||
source: "epc",
|
||||
},
|
||||
],
|
||||
},
|
||||
EnumFeatureGroup {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ use tower_http::cors::{AllowHeaders, AllowMethods, CorsLayer};
|
|||
use tower_http::services::{ServeDir, ServeFile};
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use state::AppState;
|
||||
|
|
@ -35,9 +37,21 @@ use state::AppState;
|
|||
#[derive(Parser)]
|
||||
#[command(name = "perfect-postcode", about = "Perfect Postcode property map server")]
|
||||
struct Cli {
|
||||
/// Path to the wide property parquet file
|
||||
/// Path to properties.parquet (one row per historical property)
|
||||
#[arg(long)]
|
||||
data: PathBuf,
|
||||
properties: PathBuf,
|
||||
|
||||
/// Path to postcode.parquet (one row per postcode with area-level data)
|
||||
#[arg(long)]
|
||||
postcode_features: PathBuf,
|
||||
|
||||
/// Path to online_listings_buy.parquet
|
||||
#[arg(long)]
|
||||
listings_buy: PathBuf,
|
||||
|
||||
/// Path to online_listings_rent.parquet
|
||||
#[arg(long)]
|
||||
listings_rent: PathBuf,
|
||||
|
||||
/// Path to the POI parquet file
|
||||
#[arg(long)]
|
||||
|
|
@ -79,11 +93,11 @@ struct Cli {
|
|||
#[arg(long, env = "POCKETBASE_ADMIN_PASSWORD")]
|
||||
pocketbase_admin_password: String,
|
||||
|
||||
/// Ollama server URL for AI area summaries (e.g. http://ollama:11434)
|
||||
/// Ollama server URL (e.g. http://ollama:11434)
|
||||
#[arg(long, env = "OLLAMA_URL")]
|
||||
ollama_url: String,
|
||||
|
||||
/// Ollama model name for area summaries
|
||||
/// Ollama model name
|
||||
#[arg(long, env = "OLLAMA_MODEL")]
|
||||
ollama_model: String,
|
||||
|
||||
|
|
@ -115,22 +129,24 @@ struct Cli {
|
|||
#[arg(long, env = "GOOGLE_OAUTH_CLIENT_SECRET")]
|
||||
google_oauth_client_secret: String,
|
||||
|
||||
/// Apple OAuth client ID for PocketBase SSO
|
||||
#[arg(long, env = "APPLE_OAUTH_CLIENT_ID")]
|
||||
apple_oauth_client_id: String,
|
||||
|
||||
/// Apple OAuth client secret for PocketBase SSO
|
||||
#[arg(long, env = "APPLE_OAUTH_CLIENT_SECRET")]
|
||||
apple_oauth_client_secret: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
let file_appender = tracing_appender::rolling::daily("logs", "server.log");
|
||||
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
||||
|
||||
let env_filter =
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(tracing_subscriber::fmt::layer().with_ansi(true))
|
||||
.with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_writer(non_blocking),
|
||||
)
|
||||
.with_ansi(true)
|
||||
.init();
|
||||
|
||||
// Initialize Prometheus metrics
|
||||
|
|
@ -139,16 +155,30 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
let cli = Cli::parse();
|
||||
|
||||
let parquet_path = &cli.data;
|
||||
if !parquet_path.exists() {
|
||||
bail!(
|
||||
"Property parquet file not found: {}",
|
||||
parquet_path.display()
|
||||
);
|
||||
for (label, path) in [
|
||||
("Properties", &cli.properties),
|
||||
("Postcode features", &cli.postcode_features),
|
||||
("Listings buy", &cli.listings_buy),
|
||||
("Listings rent", &cli.listings_rent),
|
||||
] {
|
||||
if !path.exists() {
|
||||
bail!("{} parquet file not found: {}", label, path.display());
|
||||
}
|
||||
}
|
||||
|
||||
info!("Loading property data from {}", parquet_path.display());
|
||||
let property_data = data::PropertyData::load(parquet_path)?;
|
||||
info!(
|
||||
"Loading property data from {}, {}, {}, {}",
|
||||
cli.properties.display(),
|
||||
cli.postcode_features.display(),
|
||||
cli.listings_buy.display(),
|
||||
cli.listings_rent.display(),
|
||||
);
|
||||
let property_data = data::PropertyData::load(
|
||||
&cli.properties,
|
||||
&cli.postcode_features,
|
||||
&cli.listings_buy,
|
||||
&cli.listings_rent,
|
||||
)?;
|
||||
info!(
|
||||
rows = property_data.lat.len(),
|
||||
features = property_data.num_features,
|
||||
|
|
@ -297,8 +327,6 @@ async fn main() -> anyhow::Result<()> {
|
|||
&cli.public_url,
|
||||
&cli.google_oauth_client_id,
|
||||
&cli.google_oauth_client_secret,
|
||||
&cli.apple_oauth_client_id,
|
||||
&cli.apple_oauth_client_secret,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
|
@ -382,7 +410,6 @@ async fn main() -> anyhow::Result<()> {
|
|||
let state_crawler = state.clone();
|
||||
let state_pb = state.clone();
|
||||
let state_postcode_stats = state.clone();
|
||||
let state_area_summary = state.clone();
|
||||
let state_places = state.clone();
|
||||
let state_shorten = state.clone();
|
||||
let state_short_url = state.clone();
|
||||
|
|
@ -447,7 +474,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
)
|
||||
.route(
|
||||
"/api/screenshot",
|
||||
get(move |query| routes::get_screenshot(state_screenshot.clone(), query)),
|
||||
get(move |headers, query| routes::get_screenshot(state_screenshot.clone(), headers, query)),
|
||||
)
|
||||
.route(
|
||||
"/api/export",
|
||||
|
|
@ -455,11 +482,6 @@ async fn main() -> anyhow::Result<()> {
|
|||
.layer(ConcurrencyLimitLayer::new(3)),
|
||||
)
|
||||
.route("/api/me", get(routes::get_me))
|
||||
.route(
|
||||
"/api/area-summary",
|
||||
post(move |body| routes::post_area_summary(state_area_summary.clone(), body))
|
||||
.layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/shorten",
|
||||
post(move |body| routes::post_shorten(state_shorten.clone(), body)),
|
||||
|
|
|
|||
|
|
@ -18,10 +18,21 @@ struct CollectionItem {
|
|||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct CreateCollection {
|
||||
name: String,
|
||||
r#type: String,
|
||||
fields: Vec<Field>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
list_rule: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
view_rule: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
create_rule: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
update_rule: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
delete_rule: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -251,6 +262,37 @@ async fn ensure_user_fields(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure the `saved_searches` collection has API rules allowing users to manage their own records.
|
||||
async fn ensure_saved_searches_rules(
|
||||
client: &Client,
|
||||
base_url: &str,
|
||||
token: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let url = format!("{base_url}/api/collections/saved_searches");
|
||||
let user_only = "user = @request.auth.id";
|
||||
let resp = client
|
||||
.patch(&url)
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.json(&serde_json::json!({
|
||||
"listRule": user_only,
|
||||
"viewRule": user_only,
|
||||
"createRule": user_only,
|
||||
"updateRule": user_only,
|
||||
"deleteRule": user_only,
|
||||
}))
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Failed to update saved_searches API rules ({status}): {text}");
|
||||
}
|
||||
|
||||
info!("PocketBase collection 'saved_searches' API rules updated");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure the `saved_searches` and `short_urls` collections exist in PocketBase,
|
||||
/// and that the `users` collection has `is_admin` and `subscription` fields.
|
||||
/// Authenticates as superuser, checks existing collections, and creates any that are missing.
|
||||
|
|
@ -269,6 +311,7 @@ pub async fn ensure_collections(
|
|||
|
||||
if !existing.iter().any(|n| n == "saved_searches") {
|
||||
let users_id = find_users_collection_id(client, base_url, &token).await?;
|
||||
let user_only = Some("user = @request.auth.id".to_string());
|
||||
create_collection(
|
||||
client,
|
||||
base_url,
|
||||
|
|
@ -282,11 +325,16 @@ pub async fn ensure_collections(
|
|||
Field::text("params", true),
|
||||
Field::file("screenshot", vec!["image/png", "image/jpeg", "image/webp"]),
|
||||
],
|
||||
list_rule: user_only.clone(),
|
||||
view_rule: user_only.clone(),
|
||||
create_rule: user_only.clone(),
|
||||
update_rule: user_only.clone(),
|
||||
delete_rule: user_only,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
info!("PocketBase collection 'saved_searches' already exists");
|
||||
ensure_saved_searches_rules(client, base_url, &token).await?;
|
||||
}
|
||||
|
||||
if !existing.iter().any(|n| n == "invites") {
|
||||
|
|
@ -304,6 +352,11 @@ pub async fn ensure_collections(
|
|||
Field::text("used_by_id", false),
|
||||
Field::text("used_at", false),
|
||||
],
|
||||
list_rule: None,
|
||||
view_rule: None,
|
||||
create_rule: None,
|
||||
update_rule: None,
|
||||
delete_rule: None,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
|
@ -323,6 +376,11 @@ pub async fn ensure_collections(
|
|||
Field::text("code", true),
|
||||
Field::text("params", true),
|
||||
],
|
||||
list_rule: None,
|
||||
view_rule: None,
|
||||
create_rule: None,
|
||||
update_rule: None,
|
||||
delete_rule: None,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
|
@ -333,7 +391,7 @@ pub async fn ensure_collections(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Configure Google and Apple OAuth2 providers in PocketBase settings.
|
||||
/// Configure Google OAuth2 provider in PocketBase settings.
|
||||
/// Also sets `meta.appUrl` so OAuth callbacks route to `{public_url}/pb`.
|
||||
pub async fn ensure_oauth_providers(
|
||||
client: &Client,
|
||||
|
|
@ -343,8 +401,6 @@ pub async fn ensure_oauth_providers(
|
|||
public_url: &str,
|
||||
google_client_id: &str,
|
||||
google_client_secret: &str,
|
||||
apple_client_id: &str,
|
||||
apple_client_secret: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let base_url = base_url.trim_end_matches('/');
|
||||
let token = auth_superuser(client, base_url, admin_email, admin_password).await?;
|
||||
|
|
@ -392,12 +448,6 @@ pub async fn ensure_oauth_providers(
|
|||
provider["enabled"] = serde_json::json!(true);
|
||||
info!("Configured Google OAuth provider");
|
||||
}
|
||||
"apple" => {
|
||||
provider["clientId"] = serde_json::json!(apple_client_id);
|
||||
provider["clientSecret"] = serde_json::json!(apple_client_secret);
|
||||
provider["enabled"] = serde_json::json!(true);
|
||||
info!("Configured Apple OAuth provider");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
mod ai_filters;
|
||||
mod area_summary;
|
||||
mod checkout;
|
||||
mod export;
|
||||
mod features;
|
||||
|
|
@ -26,7 +25,6 @@ pub(crate) mod travel_time;
|
|||
mod travel_modes;
|
||||
|
||||
pub use ai_filters::{build_ollama_schema, build_system_prompt, post_ai_filters};
|
||||
pub use area_summary::post_area_summary;
|
||||
pub use checkout::post_checkout;
|
||||
pub use export::get_export;
|
||||
pub use features::{build_features_response, get_features, FeatureInfo, FeaturesResponse};
|
||||
|
|
|
|||
|
|
@ -1,118 +0,0 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
|
||||
use crate::consts::{
|
||||
AREA_SUMMARY_MAX_TOKENS, AREA_SUMMARY_SYSTEM_PROMPT, AREA_SUMMARY_TEMPERATURE,
|
||||
};
|
||||
use crate::state::AppState;
|
||||
use crate::utils::{extract_openai_content, ollama_chat, strip_think_blocks};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NumericStat {
|
||||
name: String,
|
||||
mean: f64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct EnumStat {
|
||||
name: String,
|
||||
counts: std::collections::HashMap<String, u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct AreaSummaryRequest {
|
||||
count: usize,
|
||||
location: String,
|
||||
is_postcode: bool,
|
||||
#[serde(default)]
|
||||
filters: Vec<String>,
|
||||
#[serde(default)]
|
||||
numeric_stats: Vec<NumericStat>,
|
||||
#[serde(default)]
|
||||
enum_stats: Vec<EnumStat>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct AreaSummaryResponse {
|
||||
summary: String,
|
||||
}
|
||||
|
||||
fn build_prompt(req: &AreaSummaryRequest) -> String {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
let area_type = if req.is_postcode { "postcode" } else { "area" };
|
||||
parts.push(format!(
|
||||
"Summarise this {} of England which contains {} properties matching my requirements.\n",
|
||||
area_type, req.count
|
||||
));
|
||||
|
||||
if !req.filters.is_empty() {
|
||||
parts.push(format!("Active filters: {}.\n", req.filters.join(", ")));
|
||||
}
|
||||
|
||||
if !req.numeric_stats.is_empty() {
|
||||
let stats: Vec<String> = req
|
||||
.numeric_stats
|
||||
.iter()
|
||||
.map(|stat| format!("{}: {:.1}", stat.name, stat.mean))
|
||||
.collect();
|
||||
parts.push(format!(
|
||||
"Average values of the {}: {}.",
|
||||
if req.is_postcode { "postcode" } else { "area" },
|
||||
stats.join(", ")
|
||||
));
|
||||
}
|
||||
|
||||
for es in &req.enum_stats {
|
||||
let total: u64 = es.counts.values().sum();
|
||||
if total == 0 {
|
||||
continue;
|
||||
}
|
||||
let mut sorted: Vec<_> = es.counts.iter().collect();
|
||||
sorted.sort_by(|lhs, rhs| rhs.1.cmp(lhs.1));
|
||||
let top: Vec<String> = sorted
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|(val, count)| {
|
||||
let pct = **count as f64 / total as f64 * 100.0;
|
||||
format!("{} ({:.0}%)", val, pct)
|
||||
})
|
||||
.collect();
|
||||
parts.push(format!("{}: {}.", es.name, top.join(", ")));
|
||||
}
|
||||
|
||||
let result = parts.join(" ");
|
||||
info!(prompt = %result, "Built prompt for area summary");
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn post_area_summary(
|
||||
state: Arc<AppState>,
|
||||
Json(req): Json<AreaSummaryRequest>,
|
||||
) -> Result<Json<AreaSummaryResponse>, (StatusCode, String)> {
|
||||
let prompt = build_prompt(&req);
|
||||
info!(location = %req.location, count = req.count, "POST /api/area-summary");
|
||||
|
||||
let url = format!("{}/v1/chat/completions", state.ollama_url);
|
||||
let body = serde_json::json!({
|
||||
"model": state.ollama_model,
|
||||
"messages": [
|
||||
{ "role": "system", "content": AREA_SUMMARY_SYSTEM_PROMPT },
|
||||
{ "role": "user", "content": prompt }
|
||||
],
|
||||
"stream": false,
|
||||
"temperature": AREA_SUMMARY_TEMPERATURE,
|
||||
"max_tokens": AREA_SUMMARY_MAX_TOKENS,
|
||||
});
|
||||
|
||||
let json = ollama_chat(&state.http_client, &url, &body).await?;
|
||||
let content = extract_openai_content(&json)?;
|
||||
|
||||
let summary = strip_think_blocks(content).trim().to_string();
|
||||
|
||||
Ok(Json(AreaSummaryResponse { summary }))
|
||||
}
|
||||
|
|
@ -11,7 +11,7 @@ use tracing::info;
|
|||
|
||||
use crate::aggregation::Aggregator;
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::consts::MAX_CELLS_PER_REQUEST;
|
||||
use crate::consts::{DEMO_CENTER, DEMO_CENTER_TOLERANCE, MAX_CELLS_PER_REQUEST};
|
||||
use crate::data::travel_time::TravelData;
|
||||
use crate::licensing::check_license_bounds;
|
||||
use crate::parsing::{
|
||||
|
|
@ -190,9 +190,14 @@ pub async fn get_hexagons(
|
|||
let (south, west, north, east) =
|
||||
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
|
||||
|
||||
// Skip license check at low resolutions (≤5) — data is too aggregated to be
|
||||
// commercially useful, and the homepage demo needs country-wide access.
|
||||
if resolution > 5 {
|
||||
// Allow the homepage demo: check if the center of the requested bounds
|
||||
// is near the demo view center (52.2, -1.9).
|
||||
let center_lat = (south + north) / 2.0;
|
||||
let center_lng = (west + east) / 2.0;
|
||||
let is_demo_view = (center_lat - DEMO_CENTER.0).abs() <= DEMO_CENTER_TOLERANCE
|
||||
&& (center_lng - DEMO_CENTER.1).abs() <= DEMO_CENTER_TOLERANCE;
|
||||
|
||||
if !is_demo_view {
|
||||
check_license_bounds(&user.0, (south, west, north, east))
|
||||
.map_err(|(_, resp)| resp)?;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,10 @@ pub struct Property {
|
|||
pub duration: Option<String>,
|
||||
pub current_energy_rating: Option<String>,
|
||||
pub potential_energy_rating: Option<String>,
|
||||
pub listing_status: Option<String>,
|
||||
pub listing_url: Option<String>,
|
||||
pub property_sub_type: Option<String>,
|
||||
pub price_qualifier: Option<String>,
|
||||
|
||||
// Numeric fields
|
||||
pub lat: f32,
|
||||
|
|
@ -48,6 +52,9 @@ pub struct Property {
|
|||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub renovation_history: Vec<RenovationEvent>,
|
||||
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub listing_features: Vec<String>,
|
||||
|
||||
#[serde(flatten)]
|
||||
pub features: FxHashMap<String, f32>,
|
||||
}
|
||||
|
|
@ -231,6 +238,18 @@ pub async fn get_hexagon_properties(
|
|||
lat: state.data.lat[row],
|
||||
lon: state.data.lon[row],
|
||||
renovation_history: state.data.renovation_history(row).to_vec(),
|
||||
listing_features: state.data.listing_features(row).to_vec(),
|
||||
listing_status: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
"Listing status",
|
||||
),
|
||||
listing_url: state.data.listing_url(row).map(String::from),
|
||||
property_sub_type: state.data.property_sub_type(row).map(String::from),
|
||||
price_qualifier: state.data.price_qualifier(row).map(String::from),
|
||||
features,
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::http::{header, StatusCode, Uri};
|
||||
use axum::http::{header, HeaderMap, StatusCode, Uri};
|
||||
use axum::response::IntoResponse;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
pub async fn get_screenshot(state: Arc<AppState>, uri: Uri) -> impl IntoResponse {
|
||||
pub async fn get_screenshot(
|
||||
state: Arc<AppState>,
|
||||
headers: HeaderMap,
|
||||
uri: Uri,
|
||||
) -> impl IntoResponse {
|
||||
let screenshot_base = &state.screenshot_url;
|
||||
|
||||
let qs = uri
|
||||
|
|
@ -16,7 +20,12 @@ pub async fn get_screenshot(state: Arc<AppState>, uri: Uri) -> impl IntoResponse
|
|||
let url = format!("{screenshot_base}/screenshot{qs}");
|
||||
info!("Proxying screenshot request to: {}", url);
|
||||
|
||||
match state.http_client.get(&url).send().await {
|
||||
let mut req = state.http_client.get(&url);
|
||||
if let Some(auth) = headers.get(header::AUTHORIZATION) {
|
||||
req = req.header(header::AUTHORIZATION, auth);
|
||||
}
|
||||
|
||||
match req.send().await {
|
||||
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
|
||||
Ok(bytes) => (
|
||||
StatusCode::OK,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use rand::Rng;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use tracing::warn;
|
||||
|
||||
use crate::pocketbase::auth_superuser;
|
||||
use crate::state::AppState;
|
||||
|
||||
const CODE_LEN: usize = 8;
|
||||
|
|
@ -39,6 +40,22 @@ struct PbRecord {
|
|||
|
||||
pub async fn post_shorten(state: Arc<AppState>, Json(req): Json<ShortenRequest>) -> Response {
|
||||
let pb_url = state.pocketbase_url.trim_end_matches('/');
|
||||
|
||||
let token = match auth_superuser(
|
||||
&state.http_client,
|
||||
pb_url,
|
||||
&state.pocketbase_admin_email,
|
||||
&state.pocketbase_admin_password,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
warn!("PocketBase superuser auth failed: {err}");
|
||||
return StatusCode::BAD_GATEWAY.into_response();
|
||||
}
|
||||
};
|
||||
|
||||
let code = generate_code();
|
||||
|
||||
let record = PbRecord {
|
||||
|
|
@ -51,6 +68,7 @@ pub async fn post_shorten(state: Arc<AppState>, Json(req): Json<ShortenRequest>)
|
|||
.post(format!(
|
||||
"{pb_url}/api/collections/short_urls/records"
|
||||
))
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.json(&record)
|
||||
.send()
|
||||
.await;
|
||||
|
|
@ -79,13 +97,33 @@ pub async fn post_shorten(state: Arc<AppState>, Json(req): Json<ShortenRequest>)
|
|||
pub async fn get_short_url(state: Arc<AppState>, Path(code): Path<String>) -> Response {
|
||||
let pb_url = state.pocketbase_url.trim_end_matches('/');
|
||||
|
||||
let token = match auth_superuser(
|
||||
&state.http_client,
|
||||
pb_url,
|
||||
&state.pocketbase_admin_email,
|
||||
&state.pocketbase_admin_password,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
warn!("PocketBase superuser auth failed: {err}");
|
||||
return StatusCode::BAD_GATEWAY.into_response();
|
||||
}
|
||||
};
|
||||
|
||||
let filter = format!("code=\"{code}\"");
|
||||
let url = format!(
|
||||
"{pb_url}/api/collections/short_urls/records?filter={}&perPage=1",
|
||||
urlencoding::encode(&filter)
|
||||
);
|
||||
|
||||
let res = state.http_client.get(&url).send().await;
|
||||
let res = state
|
||||
.http_client
|
||||
.get(&url)
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
|
|
|
|||
|
|
@ -6,4 +6,4 @@ mod llm;
|
|||
pub use grid_index::GridIndex;
|
||||
pub use hash::{generate_priorities, splitmix64_hash};
|
||||
pub use interned_column::InternedColumn;
|
||||
pub use llm::{extract_ollama_content, extract_openai_content, ollama_chat, strip_think_blocks};
|
||||
pub use llm::{extract_ollama_content, ollama_chat, strip_think_blocks};
|
||||
|
|
|
|||
|
|
@ -40,22 +40,6 @@ pub async fn ollama_chat(
|
|||
})
|
||||
}
|
||||
|
||||
/// Extract content from OpenAI-compatible response (`choices[0].message.content`)
|
||||
pub fn extract_openai_content(json: &Value) -> Result<&str, LlmError> {
|
||||
json.get("choices")
|
||||
.and_then(|ch| ch.get(0))
|
||||
.and_then(|ch| ch.get("message"))
|
||||
.and_then(|msg| msg.get("content"))
|
||||
.and_then(|ct| ct.as_str())
|
||||
.ok_or_else(|| {
|
||||
warn!("Malformed OpenAI response: missing choices[0].message.content");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
"Malformed LLM response: missing choices[0].message.content".into(),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract content from Ollama native response (`message.content`)
|
||||
pub fn extract_ollama_content(json: &Value) -> Result<&str, LlmError> {
|
||||
json.get("message")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue