All good
This commit is contained in:
parent
6ea544a0f6
commit
6cc7288126
45 changed files with 929 additions and 1043 deletions
|
|
@ -6,6 +6,8 @@ use polars::prelude::*;
|
|||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
use crate::consts::{NAN_U16, QUANT_SCALE};
|
||||
use crate::data::{PropertyData, QuantRef};
|
||||
use crate::utils::{normalize_postcode, GridIndex, InternedColumn};
|
||||
|
||||
const GRID_CELL_SIZE: f32 = 0.01;
|
||||
|
|
@ -52,15 +54,22 @@ pub struct ActualListingData {
|
|||
pub listing_status: InternedColumn,
|
||||
pub listing_date_iso: Vec<Option<String>>,
|
||||
pub features: Vec<Vec<String>>,
|
||||
/// Row-major feature matrix aligned with PropertyData::feature_names.
|
||||
///
|
||||
/// Rows start from a best-effort address/postcode join to the historical property
|
||||
/// dataset, then live listing fields such as asking price and property type are
|
||||
/// overlaid where available. This lets the listings endpoint use the same filter
|
||||
/// execution path as the property endpoints.
|
||||
pub filter_feature_data: Vec<u16>,
|
||||
pub grid: GridIndex,
|
||||
}
|
||||
|
||||
impl ActualListingData {
|
||||
pub fn load(parquet_path: &Path) -> Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
pub fn load(parquet_path: &Path, property_data: &PropertyData) -> Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path, Some(property_data)))
|
||||
}
|
||||
|
||||
fn load_inner(parquet_path: &Path) -> Result<Self> {
|
||||
fn load_inner(parquet_path: &Path, property_data: Option<&PropertyData>) -> Result<Self> {
|
||||
info!("Loading actual listings from {:?}", parquet_path);
|
||||
let pl_path = PlRefPath::try_from_path(parquet_path)
|
||||
.context("Failed to normalize actual listings parquet path")?;
|
||||
|
|
@ -99,6 +108,18 @@ impl ActualListingData {
|
|||
let price_qualifier = InternedColumn::build(&opt_to_string(&price_qualifier_raw));
|
||||
let listing_status = InternedColumn::build(&opt_to_string(&listing_status_raw));
|
||||
|
||||
let filter_feature_data = build_filter_feature_data(
|
||||
property_data,
|
||||
&postcode,
|
||||
&address,
|
||||
&property_type_raw,
|
||||
&leasehold_freehold_raw,
|
||||
&rooms_total,
|
||||
&floor_area_sqm,
|
||||
&asking_price,
|
||||
&asking_price_per_sqm,
|
||||
);
|
||||
|
||||
let grid = GridIndex::build(&lat, &lon, GRID_CELL_SIZE);
|
||||
|
||||
info!(rows = row_count, "Actual listings loaded");
|
||||
|
|
@ -122,6 +143,7 @@ impl ActualListingData {
|
|||
listing_status,
|
||||
listing_date_iso,
|
||||
features,
|
||||
filter_feature_data,
|
||||
grid,
|
||||
})
|
||||
}
|
||||
|
|
@ -150,6 +172,201 @@ impl ActualListingData {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn build_filter_feature_data(
|
||||
property_data: Option<&PropertyData>,
|
||||
postcode: &[String],
|
||||
address: &[Option<String>],
|
||||
property_type: &[Option<String>],
|
||||
leasehold_freehold: &[Option<String>],
|
||||
rooms_total: &[Option<i32>],
|
||||
floor_area_sqm: &[Option<f32>],
|
||||
asking_price: &[Option<i64>],
|
||||
asking_price_per_sqm: &[Option<f32>],
|
||||
) -> Vec<u16> {
|
||||
let Some(property_data) = property_data else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let num_features = property_data.num_features;
|
||||
let mut feature_data = vec![NAN_U16; postcode.len() * num_features];
|
||||
let mut joined_rows = 0usize;
|
||||
|
||||
for (row, postcode_value) in postcode.iter().enumerate() {
|
||||
let Some(address_value) = address[row]
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|v| !v.is_empty())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let query = format!("{address_value} {postcode_value}");
|
||||
let Some(&property_row) = property_data.search_addresses(&query, 1).first() else {
|
||||
continue;
|
||||
};
|
||||
if property_data.postcode(property_row) != postcode_value {
|
||||
continue;
|
||||
}
|
||||
|
||||
let dst = row * num_features;
|
||||
let src = property_row * num_features;
|
||||
feature_data[dst..dst + num_features]
|
||||
.copy_from_slice(&property_data.feature_data[src..src + num_features]);
|
||||
joined_rows += 1;
|
||||
}
|
||||
|
||||
let quant = property_data.quant_ref();
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Total floor area (sqm)",
|
||||
floor_area_sqm.iter().copied(),
|
||||
false,
|
||||
);
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Number of bedrooms & living rooms",
|
||||
rooms_total.iter().map(|value| value.map(|v| v as f32)),
|
||||
false,
|
||||
);
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Estimated current price",
|
||||
asking_price.iter().map(|value| value.map(|v| v as f32)),
|
||||
true,
|
||||
);
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Last known price",
|
||||
asking_price.iter().map(|value| value.map(|v| v as f32)),
|
||||
true,
|
||||
);
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Est. price per sqm",
|
||||
asking_price_per_sqm.iter().copied(),
|
||||
true,
|
||||
);
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
&quant,
|
||||
"Price per sqm",
|
||||
asking_price_per_sqm.iter().copied(),
|
||||
true,
|
||||
);
|
||||
overlay_enum_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
"Property type",
|
||||
property_type.iter().map(Option::as_deref),
|
||||
false,
|
||||
);
|
||||
overlay_enum_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
"Leasehold/Freehold",
|
||||
leasehold_freehold.iter().map(Option::as_deref),
|
||||
false,
|
||||
);
|
||||
|
||||
info!(
|
||||
rows = postcode.len(),
|
||||
joined_rows, "Actual listings joined to property feature matrix"
|
||||
);
|
||||
|
||||
feature_data
|
||||
}
|
||||
|
||||
fn feature_index(property_data: &PropertyData, name: &str) -> Option<usize> {
|
||||
property_data
|
||||
.feature_names
|
||||
.iter()
|
||||
.position(|candidate| candidate == name)
|
||||
}
|
||||
|
||||
fn overlay_numeric_feature<I>(
|
||||
feature_data: &mut [u16],
|
||||
property_data: &PropertyData,
|
||||
quant: &QuantRef<'_>,
|
||||
name: &str,
|
||||
values: I,
|
||||
clear_missing: bool,
|
||||
) where
|
||||
I: IntoIterator<Item = Option<f32>>,
|
||||
{
|
||||
let Some(feat_idx) = feature_index(property_data, name) else {
|
||||
return;
|
||||
};
|
||||
if feat_idx >= property_data.num_numeric {
|
||||
return;
|
||||
}
|
||||
|
||||
let num_features = property_data.num_features;
|
||||
for (row, value) in values.into_iter().enumerate() {
|
||||
let dst = row * num_features + feat_idx;
|
||||
match value {
|
||||
Some(value) => feature_data[dst] = encode_numeric_value(quant, feat_idx, value),
|
||||
None if clear_missing => feature_data[dst] = NAN_U16,
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn overlay_enum_feature<'a, I>(
|
||||
feature_data: &mut [u16],
|
||||
property_data: &PropertyData,
|
||||
name: &str,
|
||||
values: I,
|
||||
clear_missing: bool,
|
||||
) where
|
||||
I: IntoIterator<Item = Option<&'a str>>,
|
||||
{
|
||||
let Some(feat_idx) = feature_index(property_data, name) else {
|
||||
return;
|
||||
};
|
||||
let Some(enum_values) = property_data.enum_values.get(&feat_idx) else {
|
||||
return;
|
||||
};
|
||||
|
||||
let num_features = property_data.num_features;
|
||||
for (row, value) in values.into_iter().enumerate() {
|
||||
let dst = row * num_features + feat_idx;
|
||||
let encoded = value
|
||||
.map(str::trim)
|
||||
.filter(|text| !text.is_empty())
|
||||
.and_then(|text| enum_values.iter().position(|candidate| candidate == text))
|
||||
.map(|position| position as u16);
|
||||
match encoded {
|
||||
Some(value) => feature_data[dst] = value,
|
||||
None if clear_missing => feature_data[dst] = NAN_U16,
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_numeric_value(quant: &QuantRef<'_>, feat_idx: usize, value: f32) -> u16 {
|
||||
if !value.is_finite() {
|
||||
return NAN_U16;
|
||||
}
|
||||
let range = quant.quant_range[feat_idx];
|
||||
if range <= 0.0 {
|
||||
return 0;
|
||||
}
|
||||
let normalized = (value - quant.quant_min[feat_idx]) / range;
|
||||
(normalized * QUANT_SCALE).round().clamp(0.0, QUANT_SCALE) as u16
|
||||
}
|
||||
|
||||
fn opt_to_string(values: &[Option<String>]) -> Vec<String> {
|
||||
values
|
||||
.iter()
|
||||
|
|
@ -311,7 +528,7 @@ mod tests {
|
|||
return;
|
||||
};
|
||||
|
||||
let data = ActualListingData::load(&path).expect("listings load");
|
||||
let data = ActualListingData::load_inner(&path, None).expect("listings load");
|
||||
assert!(!data.lat.is_empty());
|
||||
assert_eq!(data.lat.len(), data.lon.len());
|
||||
assert_eq!(data.lat.len(), data.postcode.len());
|
||||
|
|
|
|||
|
|
@ -30,16 +30,6 @@ const GROCERY_DASHBOARD_CATEGORIES: &[&str] = &[
|
|||
"Budgens",
|
||||
"Centra",
|
||||
"Co-op",
|
||||
"Central England Co-operative",
|
||||
"Chelmsford Star Co-operative Society",
|
||||
"East of England Co-operative",
|
||||
"Heart of England Co-operative",
|
||||
"Lincolnshire Co-operative",
|
||||
"Midcounties Co-operative",
|
||||
"Scottish Midland Co-operative",
|
||||
"Tamworth Co-operative Society",
|
||||
"The Radstock Co-operative Society",
|
||||
"The Southern Co-operative",
|
||||
"COOK",
|
||||
"Costco",
|
||||
"Dunnes Stores",
|
||||
|
|
@ -104,10 +94,35 @@ fn add_category_filter_index(
|
|||
}
|
||||
}
|
||||
|
||||
fn canonical_poi_category(category: &str) -> &str {
|
||||
match category {
|
||||
"Allendale Co-operative Society"
|
||||
| "Central England Co-operative"
|
||||
| "Channel Islands Co-operative Society"
|
||||
| "Chelmsford Star Co-operative Society"
|
||||
| "Clydebank Co-operative"
|
||||
| "Coniston Co-operative Society"
|
||||
| "Co-op Food"
|
||||
| "East of England Co-operative"
|
||||
| "Heart of England Co-operative"
|
||||
| "Langdale Co-operative Society"
|
||||
| "Lincolnshire Co-operative"
|
||||
| "Midcounties Co-operative"
|
||||
| "Scottish Midland Co-operative"
|
||||
| "Tamworth Co-operative Society"
|
||||
| "The Co-operative Food"
|
||||
| "The Co-operative Food PFS"
|
||||
| "The Co-operative Group"
|
||||
| "The Radstock Co-operative Society"
|
||||
| "The Southern Co-operative" => "Co-op",
|
||||
_ => category,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_poi_category_filter(category_values: &[String], categories: &str) -> FxHashSet<u16> {
|
||||
let mut selected = FxHashSet::default();
|
||||
for part in categories.split(',') {
|
||||
let category = part.trim();
|
||||
let category = canonical_poi_category(part.trim());
|
||||
if category.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -200,12 +215,18 @@ impl POIData {
|
|||
|
||||
let id_raw: Vec<String> = extract_str_col(&df, "id")?;
|
||||
let name = extract_str_col(&df, "name")?;
|
||||
let category_raw = extract_str_col(&df, "category")?;
|
||||
let category_raw: Vec<String> = extract_str_col(&df, "category")?
|
||||
.into_iter()
|
||||
.map(|category| canonical_poi_category(&category).to_string())
|
||||
.collect();
|
||||
let group_raw = extract_str_col(&df, "group")?;
|
||||
let lat = extract_f32_col(&df, "lat")?;
|
||||
let lng = extract_f32_col(&df, "lng")?;
|
||||
let emoji_raw = extract_str_col(&df, "emoji")?;
|
||||
let icon_category_raw = extract_str_col(&df, "icon_category")?;
|
||||
let icon_category_raw: Vec<String> = extract_str_col(&df, "icon_category")?
|
||||
.into_iter()
|
||||
.map(|category| canonical_poi_category(&category).to_string())
|
||||
.collect();
|
||||
|
||||
// Pack POI IDs into a contiguous buffer
|
||||
let total_id_bytes: usize = id_raw.iter().map(|s| s.len()).sum();
|
||||
|
|
@ -351,4 +372,19 @@ mod tests {
|
|||
|
||||
assert!(selected.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coop_category_aliases_resolve_to_single_category() {
|
||||
let values = vec!["Co-op".to_string(), "Tesco".to_string()];
|
||||
|
||||
let selected = resolve_poi_category_filter(
|
||||
&values,
|
||||
"Central England Co-operative,The Southern Co-operative",
|
||||
);
|
||||
|
||||
assert!(selected.contains(&0));
|
||||
assert_eq!(selected.len(), 1);
|
||||
assert_eq!(canonical_poi_category("Lincolnshire Co-operative"), "Co-op");
|
||||
assert_eq!(canonical_poi_category("Tesco"), "Tesco");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1014,22 +1014,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Feature names that describe an individual property (price, size, type, etc.) rather
|
||||
/// than the surrounding area. Use this to skip filters that should not exclude live
|
||||
/// listings on the map even though they hide aggregated property rows.
|
||||
pub fn property_level_feature_names() -> Vec<&'static str> {
|
||||
const PROPERTY_GROUPS: &[&str] = &["Properties", "Property prices"];
|
||||
FEATURE_GROUPS
|
||||
.iter()
|
||||
.filter(|group| PROPERTY_GROUPS.contains(&group.name))
|
||||
.flat_map(|group| group.features.iter())
|
||||
.map(|feature| match feature {
|
||||
Feature::Numeric(c) => c.name,
|
||||
Feature::Enum(c) => c.name,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Flat ordered list of all numeric feature names (follows group order).
|
||||
pub fn all_numeric_feature_names() -> Vec<&'static str> {
|
||||
FEATURE_GROUPS
|
||||
|
|
|
|||
|
|
@ -541,7 +541,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
bail!("Actual listings parquet not found: {}", path.display());
|
||||
}
|
||||
info!("Loading actual listings from {}", path.display());
|
||||
let listings = data::ActualListingData::load(path)?;
|
||||
let listings = data::ActualListingData::load(path, &property_data)?;
|
||||
trim_allocator("actual listings load");
|
||||
info!(rows = listings.lat.len(), "Actual listings loaded");
|
||||
Some(Arc::new(listings))
|
||||
|
|
|
|||
|
|
@ -1,16 +1,20 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::{Query, State};
|
||||
use axum::response::Json;
|
||||
use axum::response::{IntoResponse, Json, Response};
|
||||
use axum::Extension;
|
||||
use rustc_hash::FxHashSet;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
|
||||
use crate::api_error::ApiError;
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::consts::NAN_U16;
|
||||
use crate::data::ActualListing;
|
||||
use crate::features::property_level_feature_names;
|
||||
use crate::licensing::{check_license_bounds, resolve_share_code};
|
||||
use crate::parsing::{
|
||||
parse_filters_with_poi, require_bounds, row_passes_filters, row_passes_poi_filters,
|
||||
ParsedEnumFilter, ParsedFilter,
|
||||
};
|
||||
use crate::state::{AppState, SharedState};
|
||||
|
||||
|
|
@ -25,6 +29,8 @@ pub struct ActualListingsParams {
|
|||
travel: Option<String>,
|
||||
/// Number of results to skip. Defaults to 0.
|
||||
offset: Option<usize>,
|
||||
/// Share-link code; grants bbox-scoped access for unlicensed users.
|
||||
share: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -35,10 +41,24 @@ pub struct ActualListingsResponse {
|
|||
pub truncated: bool,
|
||||
}
|
||||
|
||||
const LISTING_LEVEL_FILTER_FEATURES: &[&str] = &[
|
||||
"Property type",
|
||||
"Leasehold/Freehold",
|
||||
"Total floor area (sqm)",
|
||||
"Number of bedrooms & living rooms",
|
||||
"Estimated current price",
|
||||
"Last known price",
|
||||
"Est. price per sqm",
|
||||
"Price per sqm",
|
||||
];
|
||||
|
||||
const KEEP_UNKNOWN_LISTING_FILTER_FEATURES: &[&str] = &["Total floor area (sqm)"];
|
||||
|
||||
pub async fn get_actual_listings(
|
||||
State(shared): State<Arc<SharedState>>,
|
||||
Extension(user): Extension<OptionalUser>,
|
||||
Query(params): Query<ActualListingsParams>,
|
||||
) -> Result<Json<ActualListingsResponse>, ApiError> {
|
||||
) -> Result<Json<ActualListingsResponse>, Response> {
|
||||
let state = shared.load_state();
|
||||
let offset = params.offset.unwrap_or(0);
|
||||
let Some(actual_listings) = state.actual_listings.clone() else {
|
||||
|
|
@ -49,11 +69,15 @@ pub async fn get_actual_listings(
|
|||
truncated: false,
|
||||
}));
|
||||
};
|
||||
let (south, west, north, east) = require_bounds(params.bounds).map_err(ApiError::from)?;
|
||||
let (south, west, north, east) =
|
||||
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
|
||||
|
||||
let share_bounds = resolve_share_code(&state, params.share.as_deref()).await;
|
||||
check_license_bounds(&user.0, (south, west, north, east), share_bounds)?;
|
||||
|
||||
let quant = state.data.quant_ref();
|
||||
let poi_quant = state.data.poi_metrics.quant_ref();
|
||||
let (mut parsed_filters, mut parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi(
|
||||
let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi(
|
||||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
|
|
@ -61,40 +85,38 @@ pub async fn get_actual_listings(
|
|||
&state.data.poi_metrics.name_to_index,
|
||||
&poi_quant,
|
||||
)
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
.map_err(|err| ApiError::BadRequest(err).into_response())?;
|
||||
|
||||
// Drop property-level filters (price, sqm, build year, beds, type, etc.) so they
|
||||
// don't hide live listings — those are individual-property concerns the user can
|
||||
// judge from the pin itself. We only keep area/postcode-level filters here.
|
||||
let property_level_idxs: FxHashSet<usize> = property_level_feature_names()
|
||||
.into_iter()
|
||||
.filter_map(|name| state.feature_name_to_index.get(name).copied())
|
||||
.collect();
|
||||
parsed_filters.retain(|f| !property_level_idxs.contains(&f.feat_idx));
|
||||
parsed_enum_filters.retain(|f| !property_level_idxs.contains(&f.feat_idx));
|
||||
let travel_entries = parse_optional_travel(params.travel.as_deref())
|
||||
.map_err(|err| ApiError::BadRequest(err).into_response())?;
|
||||
|
||||
let travel_entries =
|
||||
parse_optional_travel(params.travel.as_deref()).map_err(ApiError::BadRequest)?;
|
||||
let listing_level_feature_idxs = listing_level_filter_feature_idxs(&state);
|
||||
let keep_unknown_listing_filter_idxs = keep_unknown_listing_filter_feature_idxs(&state);
|
||||
let (listing_filters, postcode_filters) =
|
||||
split_numeric_filters(parsed_filters, &listing_level_feature_idxs);
|
||||
let (listing_enum_filters, postcode_enum_filters) =
|
||||
split_enum_filters(parsed_enum_filters, &listing_level_feature_idxs);
|
||||
|
||||
let has_area_filters = !parsed_filters.is_empty()
|
||||
|| !parsed_enum_filters.is_empty()
|
||||
let has_postcode_filters = !postcode_filters.is_empty()
|
||||
|| !postcode_enum_filters.is_empty()
|
||||
|| !parsed_poi_filters.is_empty()
|
||||
|| !travel_entries.is_empty();
|
||||
let has_listing_filters = !listing_filters.is_empty() || !listing_enum_filters.is_empty();
|
||||
|
||||
let state_clone = state.clone();
|
||||
let response =
|
||||
tokio::task::spawn_blocking(move || -> Result<ActualListingsResponse, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
||||
let passing_postcodes = if has_area_filters {
|
||||
let passing_postcodes = if has_postcode_filters {
|
||||
Some(compute_passing_postcodes(
|
||||
&state_clone,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
&postcode_filters,
|
||||
&postcode_enum_filters,
|
||||
&parsed_poi_filters,
|
||||
&travel_entries,
|
||||
)?)
|
||||
|
|
@ -116,6 +138,18 @@ pub async fn get_actual_listings(
|
|||
return None;
|
||||
}
|
||||
}
|
||||
if has_listing_filters
|
||||
&& !row_passes_listing_filters(
|
||||
row,
|
||||
&listing_filters,
|
||||
&listing_enum_filters,
|
||||
&actual_listings.filter_feature_data,
|
||||
state_clone.data.num_features,
|
||||
&keep_unknown_listing_filter_idxs,
|
||||
)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
Some(row)
|
||||
})
|
||||
.collect();
|
||||
|
|
@ -142,7 +176,8 @@ pub async fn get_actual_listings(
|
|||
total = total_matching,
|
||||
total_in_bounds,
|
||||
offset,
|
||||
filtered = passing_postcodes.is_some(),
|
||||
postcode_filtered = passing_postcodes.is_some(),
|
||||
listing_filtered = has_listing_filters,
|
||||
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||
"GET /api/actual-listings"
|
||||
);
|
||||
|
|
@ -155,12 +190,82 @@ pub async fn get_actual_listings(
|
|||
})
|
||||
})
|
||||
.await
|
||||
.map_err(|error| ApiError::Internal(error.to_string()))?
|
||||
.map_err(ApiError::Internal)?;
|
||||
.map_err(|error| ApiError::Internal(error.to_string()).into_response())?
|
||||
.map_err(|err| ApiError::Internal(err).into_response())?;
|
||||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
fn listing_level_filter_feature_idxs(state: &AppState) -> FxHashSet<usize> {
|
||||
feature_idxs(state, LISTING_LEVEL_FILTER_FEATURES)
|
||||
}
|
||||
|
||||
fn keep_unknown_listing_filter_feature_idxs(state: &AppState) -> FxHashSet<usize> {
|
||||
feature_idxs(state, KEEP_UNKNOWN_LISTING_FILTER_FEATURES)
|
||||
}
|
||||
|
||||
fn feature_idxs(state: &AppState, names: &[&str]) -> FxHashSet<usize> {
|
||||
names
|
||||
.iter()
|
||||
.filter_map(|name| state.feature_name_to_index.get(*name).copied())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn split_numeric_filters(
|
||||
filters: Vec<ParsedFilter>,
|
||||
listing_level_feature_idxs: &FxHashSet<usize>,
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedFilter>) {
|
||||
let mut listing_filters = Vec::new();
|
||||
let mut postcode_filters = Vec::new();
|
||||
for filter in filters {
|
||||
if listing_level_feature_idxs.contains(&filter.feat_idx) {
|
||||
listing_filters.push(filter);
|
||||
} else {
|
||||
postcode_filters.push(filter);
|
||||
}
|
||||
}
|
||||
(listing_filters, postcode_filters)
|
||||
}
|
||||
|
||||
fn split_enum_filters(
|
||||
filters: Vec<ParsedEnumFilter>,
|
||||
listing_level_feature_idxs: &FxHashSet<usize>,
|
||||
) -> (Vec<ParsedEnumFilter>, Vec<ParsedEnumFilter>) {
|
||||
let mut listing_filters = Vec::new();
|
||||
let mut postcode_filters = Vec::new();
|
||||
for filter in filters {
|
||||
if listing_level_feature_idxs.contains(&filter.feat_idx) {
|
||||
listing_filters.push(filter);
|
||||
} else {
|
||||
postcode_filters.push(filter);
|
||||
}
|
||||
}
|
||||
(listing_filters, postcode_filters)
|
||||
}
|
||||
|
||||
fn row_passes_listing_filters(
|
||||
row: usize,
|
||||
filters: &[ParsedFilter],
|
||||
enum_filters: &[ParsedEnumFilter],
|
||||
feature_data: &[u16],
|
||||
num_features: usize,
|
||||
keep_unknown_filter_idxs: &FxHashSet<usize>,
|
||||
) -> bool {
|
||||
let base = row * num_features;
|
||||
|
||||
filters.iter().all(|filter| {
|
||||
let raw = feature_data[base + filter.feat_idx];
|
||||
if raw == NAN_U16 {
|
||||
keep_unknown_filter_idxs.contains(&filter.feat_idx)
|
||||
} else {
|
||||
raw >= filter.min_u16 && raw <= filter.max_u16
|
||||
}
|
||||
}) && enum_filters.iter().all(|filter| {
|
||||
let raw = feature_data[base + filter.feat_idx];
|
||||
raw != NAN_U16 && filter.allowed.contains(&raw)
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn compute_passing_postcodes(
|
||||
state: &AppState,
|
||||
|
|
@ -224,3 +329,111 @@ fn compute_passing_postcodes(
|
|||
|
||||
Ok(passing)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn numeric_filter(feat_idx: usize) -> ParsedFilter {
|
||||
ParsedFilter {
|
||||
feat_idx,
|
||||
min_u16: 0,
|
||||
max_u16: 100,
|
||||
}
|
||||
}
|
||||
|
||||
fn enum_filter(feat_idx: usize) -> ParsedEnumFilter {
|
||||
ParsedEnumFilter {
|
||||
feat_idx,
|
||||
allowed: [0u16].into_iter().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_actual_listing_filters_by_listing_native_features() {
|
||||
let listing_level_feature_idxs: FxHashSet<usize> = [1usize, 3].into_iter().collect();
|
||||
|
||||
let (listing_filters, postcode_filters) = split_numeric_filters(
|
||||
vec![numeric_filter(0), numeric_filter(1), numeric_filter(3)],
|
||||
&listing_level_feature_idxs,
|
||||
);
|
||||
assert_eq!(
|
||||
listing_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![1, 3]
|
||||
);
|
||||
assert_eq!(
|
||||
postcode_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![0]
|
||||
);
|
||||
|
||||
let (listing_enum_filters, postcode_enum_filters) = split_enum_filters(
|
||||
vec![enum_filter(2), enum_filter(3)],
|
||||
&listing_level_feature_idxs,
|
||||
);
|
||||
assert_eq!(
|
||||
listing_enum_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![3]
|
||||
);
|
||||
assert_eq!(
|
||||
postcode_enum_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![2]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn listing_floor_area_filter_keeps_unknown_values() {
|
||||
let floor_area_filter = ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min_u16: 10,
|
||||
max_u16: 20,
|
||||
};
|
||||
let keep_unknown_filter_idxs: FxHashSet<usize> = [0usize].into_iter().collect();
|
||||
|
||||
assert!(row_passes_listing_filters(
|
||||
0,
|
||||
&[floor_area_filter],
|
||||
&[],
|
||||
&[NAN_U16],
|
||||
1,
|
||||
&keep_unknown_filter_idxs
|
||||
));
|
||||
|
||||
assert!(!row_passes_listing_filters(
|
||||
0,
|
||||
&[ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min_u16: 10,
|
||||
max_u16: 20,
|
||||
}],
|
||||
&[],
|
||||
&[9],
|
||||
1,
|
||||
&keep_unknown_filter_idxs
|
||||
));
|
||||
|
||||
assert!(row_passes_listing_filters(
|
||||
0,
|
||||
&[ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min_u16: 10,
|
||||
max_u16: 20,
|
||||
}],
|
||||
&[],
|
||||
&[15],
|
||||
1,
|
||||
&keep_unknown_filter_idxs
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue