alright
This commit is contained in:
parent
c645b0f1d4
commit
39ef5c6646
79 changed files with 5660 additions and 2199 deletions
|
|
@ -61,6 +61,9 @@ pub struct ActualListingData {
|
|||
/// overlaid where available. This lets the listings endpoint use the same filter
|
||||
/// execution path as the property endpoints.
|
||||
pub filter_feature_data: Vec<u16>,
|
||||
/// Row-major dynamic postcode POI metrics aligned with
|
||||
/// PropertyData::poi_metrics.feature_names.
|
||||
pub poi_filter_feature_data: Vec<u16>,
|
||||
pub grid: GridIndex,
|
||||
}
|
||||
|
||||
|
|
@ -109,16 +112,16 @@ impl ActualListingData {
|
|||
let listing_status = InternedColumn::build(&opt_to_string(&listing_status_raw));
|
||||
|
||||
let filter_feature_data = build_filter_feature_data(
|
||||
&df,
|
||||
property_data,
|
||||
&postcode,
|
||||
&address,
|
||||
&property_type_raw,
|
||||
&leasehold_freehold_raw,
|
||||
&rooms_total,
|
||||
&floor_area_sqm,
|
||||
&asking_price,
|
||||
&asking_price_per_sqm,
|
||||
);
|
||||
)?;
|
||||
let poi_filter_feature_data = build_poi_filter_feature_data(&df, property_data)?;
|
||||
|
||||
let grid = GridIndex::build(&lat, &lon, GRID_CELL_SIZE);
|
||||
|
||||
|
|
@ -144,6 +147,7 @@ impl ActualListingData {
|
|||
listing_date_iso,
|
||||
features,
|
||||
filter_feature_data,
|
||||
poi_filter_feature_data,
|
||||
grid,
|
||||
})
|
||||
}
|
||||
|
|
@ -174,49 +178,37 @@ impl ActualListingData {
|
|||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn build_filter_feature_data(
|
||||
df: &DataFrame,
|
||||
property_data: Option<&PropertyData>,
|
||||
postcode: &[String],
|
||||
address: &[Option<String>],
|
||||
property_type: &[Option<String>],
|
||||
leasehold_freehold: &[Option<String>],
|
||||
rooms_total: &[Option<i32>],
|
||||
floor_area_sqm: &[Option<f32>],
|
||||
asking_price: &[Option<i64>],
|
||||
asking_price_per_sqm: &[Option<f32>],
|
||||
) -> Vec<u16> {
|
||||
) -> Result<Vec<u16>> {
|
||||
let Some(property_data) = property_data else {
|
||||
return Vec::new();
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
|
||||
let num_features = property_data.num_features;
|
||||
let mut feature_data = vec![NAN_U16; postcode.len() * num_features];
|
||||
let mut joined_rows = 0usize;
|
||||
let row_count = df.height();
|
||||
let mut feature_data = vec![NAN_U16; row_count * num_features];
|
||||
let quant = property_data.quant_ref();
|
||||
let mut encoded_columns = 0usize;
|
||||
|
||||
for (row, postcode_value) in postcode.iter().enumerate() {
|
||||
let Some(address_value) = address[row]
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|v| !v.is_empty())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let query = format!("{address_value} {postcode_value}");
|
||||
let Some(&property_row) = property_data.search_addresses(&query, 1).first() else {
|
||||
continue;
|
||||
};
|
||||
if property_data.postcode(property_row) != postcode_value {
|
||||
continue;
|
||||
for (feat_idx, name) in property_data.feature_names.iter().enumerate() {
|
||||
if feat_idx < property_data.num_numeric {
|
||||
if let Some(values) = extract_optional_feature_f32(df, name)? {
|
||||
encode_numeric_feature(&mut feature_data, property_data, &quant, feat_idx, values);
|
||||
encoded_columns += 1;
|
||||
}
|
||||
} else if let Some(values) = extract_optional_feature_str(df, name)? {
|
||||
encode_enum_feature(&mut feature_data, property_data, feat_idx, values);
|
||||
encoded_columns += 1;
|
||||
}
|
||||
|
||||
let dst = row * num_features;
|
||||
let src = property_row * num_features;
|
||||
feature_data[dst..dst + num_features]
|
||||
.copy_from_slice(&property_data.feature_data[src..src + num_features]);
|
||||
joined_rows += 1;
|
||||
}
|
||||
|
||||
let quant = property_data.quant_ref();
|
||||
overlay_numeric_feature(
|
||||
&mut feature_data,
|
||||
property_data,
|
||||
|
|
@ -281,11 +273,50 @@ fn build_filter_feature_data(
|
|||
);
|
||||
|
||||
info!(
|
||||
rows = postcode.len(),
|
||||
joined_rows, "Actual listings joined to property feature matrix"
|
||||
rows = row_count,
|
||||
encoded_columns, "Actual listings feature matrix read from enriched parquet"
|
||||
);
|
||||
|
||||
feature_data
|
||||
Ok(feature_data)
|
||||
}
|
||||
|
||||
fn build_poi_filter_feature_data(
|
||||
df: &DataFrame,
|
||||
property_data: Option<&PropertyData>,
|
||||
) -> Result<Vec<u16>> {
|
||||
let Some(property_data) = property_data else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
let poi_metrics = &property_data.poi_metrics;
|
||||
let num_features = poi_metrics.num_features();
|
||||
if num_features == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let row_count = df.height();
|
||||
let mut feature_data = vec![NAN_U16; row_count * num_features];
|
||||
let quant = poi_metrics.quant_ref();
|
||||
let mut encoded_columns = 0usize;
|
||||
|
||||
for (metric_idx, name) in poi_metrics.feature_names.iter().enumerate() {
|
||||
let Some(values) = extract_optional_feature_f32(df, name)? else {
|
||||
continue;
|
||||
};
|
||||
for (row, value) in values.into_iter().enumerate() {
|
||||
let dst = row * num_features + metric_idx;
|
||||
feature_data[dst] = value
|
||||
.map(|value| encode_numeric_value(&quant, metric_idx, value))
|
||||
.unwrap_or(NAN_U16);
|
||||
}
|
||||
encoded_columns += 1;
|
||||
}
|
||||
|
||||
info!(
|
||||
rows = row_count,
|
||||
encoded_columns, "Actual listings POI metrics read from enriched parquet"
|
||||
);
|
||||
|
||||
Ok(feature_data)
|
||||
}
|
||||
|
||||
fn feature_index(property_data: &PropertyData, name: &str) -> Option<usize> {
|
||||
|
|
@ -323,6 +354,53 @@ fn overlay_numeric_feature<I>(
|
|||
}
|
||||
}
|
||||
|
||||
fn encode_numeric_feature<I>(
|
||||
feature_data: &mut [u16],
|
||||
property_data: &PropertyData,
|
||||
quant: &QuantRef<'_>,
|
||||
feat_idx: usize,
|
||||
values: I,
|
||||
) where
|
||||
I: IntoIterator<Item = Option<f32>>,
|
||||
{
|
||||
let num_features = property_data.num_features;
|
||||
for (row, value) in values.into_iter().enumerate() {
|
||||
let dst = row * num_features + feat_idx;
|
||||
feature_data[dst] = value
|
||||
.map(|value| encode_numeric_value(quant, feat_idx, value))
|
||||
.unwrap_or(NAN_U16);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_optional_feature_f32(df: &DataFrame, name: &str) -> Result<Option<Vec<Option<f32>>>> {
|
||||
let Ok(column) = df.column(name) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if matches!(column.dtype(), DataType::Datetime(_, _) | DataType::Date) {
|
||||
let projected = df
|
||||
.clone()
|
||||
.lazy()
|
||||
.select([(col(name).dt().year().cast(DataType::Float32)
|
||||
+ (col(name).dt().month().cast(DataType::Float32) - lit(1.0f32)) / lit(12.0f32))
|
||||
.alias("__feature")])
|
||||
.collect()
|
||||
.with_context(|| format!("Failed to convert datetime feature '{name}'"))?;
|
||||
return Ok(Some(extract_opt_f32(&projected, "__feature")?));
|
||||
}
|
||||
|
||||
let cast = column
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast feature '{name}' to Float32"))?;
|
||||
let values = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Feature '{name}' is not Float32"))?
|
||||
.into_iter()
|
||||
.map(|value| value.filter(|v| v.is_finite()))
|
||||
.collect();
|
||||
Ok(Some(values))
|
||||
}
|
||||
|
||||
fn overlay_enum_feature<'a, I>(
|
||||
feature_data: &mut [u16],
|
||||
property_data: &PropertyData,
|
||||
|
|
@ -355,6 +433,46 @@ fn overlay_enum_feature<'a, I>(
|
|||
}
|
||||
}
|
||||
|
||||
fn encode_enum_feature(
|
||||
feature_data: &mut [u16],
|
||||
property_data: &PropertyData,
|
||||
feat_idx: usize,
|
||||
values: Vec<Option<String>>,
|
||||
) {
|
||||
let Some(enum_values) = property_data.enum_values.get(&feat_idx) else {
|
||||
return;
|
||||
};
|
||||
let num_features = property_data.num_features;
|
||||
for (row, value) in values.into_iter().enumerate() {
|
||||
let dst = row * num_features + feat_idx;
|
||||
feature_data[dst] = value
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|text| !text.is_empty())
|
||||
.and_then(|text| enum_values.iter().position(|candidate| candidate == text))
|
||||
.map(|position| position as u16)
|
||||
.unwrap_or(NAN_U16);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_optional_feature_str(df: &DataFrame, name: &str) -> Result<Option<Vec<Option<String>>>> {
|
||||
let Ok(column) = df.column(name) else {
|
||||
return Ok(None);
|
||||
};
|
||||
let cast = column
|
||||
.cast(&DataType::String)
|
||||
.with_context(|| format!("Failed to cast feature '{name}' to String"))?;
|
||||
let strings = cast
|
||||
.str()
|
||||
.with_context(|| format!("Feature '{name}' is not a string column"))?;
|
||||
Ok(Some(
|
||||
strings
|
||||
.into_iter()
|
||||
.map(|value| value.and_then(|text| (!text.trim().is_empty()).then(|| text.to_string())))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
fn encode_numeric_value(quant: &QuantRef<'_>, feat_idx: usize, value: f32) -> u16 {
|
||||
if !value.is_finite() {
|
||||
return NAN_U16;
|
||||
|
|
@ -517,8 +635,13 @@ mod tests {
|
|||
use std::path::PathBuf;
|
||||
|
||||
fn sample_path() -> Option<PathBuf> {
|
||||
let path = PathBuf::from("../finder/data/online_listings_buy.parquet");
|
||||
path.exists().then_some(path)
|
||||
[
|
||||
"../finder/data/online_listings_buy_enriched.parquet",
|
||||
"../finder/data/online_listings_buy.parquet",
|
||||
]
|
||||
.into_iter()
|
||||
.map(PathBuf::from)
|
||||
.find(|path| path.exists())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -63,7 +63,20 @@ const DASHBOARD_POI_GROUPS: &[(&str, &[&str])] = &[
|
|||
("Groceries", GROCERY_DASHBOARD_CATEGORIES),
|
||||
("Food & Drink", &["Café", "Restaurant", "Pub", "Fast Food"]),
|
||||
("Green Space", &["Park", "Playground"]),
|
||||
("Education", &["School"]),
|
||||
(
|
||||
"Education",
|
||||
&[
|
||||
"Nursery school",
|
||||
"Primary school",
|
||||
"Secondary school",
|
||||
"All-through school",
|
||||
"Sixth form",
|
||||
"Further education college",
|
||||
"University",
|
||||
"Special school",
|
||||
"School",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Health",
|
||||
&["GP Surgery", "Pharmacy", "Dentist", "Hospital & Clinic"],
|
||||
|
|
@ -119,6 +132,21 @@ fn canonical_poi_category(category: &str) -> &str {
|
|||
}
|
||||
}
|
||||
|
||||
/// Categories the pipeline emits for the GIAS-derived school POIs. A bare
|
||||
/// `poi=School` URL (predating the per-phase split) is expanded to all of these
|
||||
/// so bookmarked links keep showing schools.
|
||||
const SCHOOL_CATEGORY_ALIASES: &[&str] = &[
|
||||
"Nursery school",
|
||||
"Primary school",
|
||||
"Secondary school",
|
||||
"All-through school",
|
||||
"Sixth form",
|
||||
"Further education college",
|
||||
"University",
|
||||
"Special school",
|
||||
"School",
|
||||
];
|
||||
|
||||
pub fn resolve_poi_category_filter(category_values: &[String], categories: &str) -> FxHashSet<u16> {
|
||||
let mut selected = FxHashSet::default();
|
||||
for part in categories.split(',') {
|
||||
|
|
@ -126,6 +154,12 @@ pub fn resolve_poi_category_filter(category_values: &[String], categories: &str)
|
|||
if category.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if category == "School" {
|
||||
for alias in SCHOOL_CATEGORY_ALIASES {
|
||||
add_category_filter_index(category_values, alias, &mut selected);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
add_category_filter_index(category_values, category, &mut selected);
|
||||
}
|
||||
selected
|
||||
|
|
@ -174,6 +208,8 @@ pub struct SchoolMetadata {
|
|||
pub telephone: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub head_name: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub ofsted_rating: Option<String>,
|
||||
}
|
||||
|
||||
pub struct POIData {
|
||||
|
|
@ -350,6 +386,8 @@ fn build_school_meta(
|
|||
let website = extract_optional_str_col(df, "school_website")?.unwrap_or_default();
|
||||
let telephone = extract_optional_str_col(df, "school_telephone")?.unwrap_or_default();
|
||||
let head_name = extract_optional_str_col(df, "school_head_name")?.unwrap_or_default();
|
||||
let ofsted_rating =
|
||||
extract_optional_str_col(df, "school_ofsted_rating")?.unwrap_or_default();
|
||||
|
||||
let fetch_str = |col: &Vec<Option<String>>, row: usize| -> Option<String> {
|
||||
col.get(row).cloned().flatten()
|
||||
|
|
@ -390,6 +428,7 @@ fn build_school_meta(
|
|||
website: fetch_str(&website, row),
|
||||
telephone: fetch_str(&telephone, row),
|
||||
head_name: fetch_str(&head_name, row),
|
||||
ofsted_rating: fetch_str(&ofsted_rating, row),
|
||||
});
|
||||
}
|
||||
Ok((idx, meta))
|
||||
|
|
@ -578,6 +617,26 @@ mod tests {
|
|||
assert!(selected.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_school_filter_expands_to_all_school_categories() {
|
||||
// Bookmarked URLs from before the per-phase split sent `poi=School`;
|
||||
// they should still match every school category that's loaded.
|
||||
let values = vec![
|
||||
"Primary school".to_string(),
|
||||
"Secondary school".to_string(),
|
||||
"University".to_string(),
|
||||
"Tesco".to_string(),
|
||||
];
|
||||
|
||||
let selected = resolve_poi_category_filter(&values, "School");
|
||||
|
||||
assert!(selected.contains(&0));
|
||||
assert!(selected.contains(&1));
|
||||
assert!(selected.contains(&2));
|
||||
assert!(!selected.contains(&3));
|
||||
assert_eq!(selected.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coop_category_aliases_resolve_to_single_category() {
|
||||
let values = vec!["Co-op".to_string(), "Tesco".to_string()];
|
||||
|
|
|
|||
|
|
@ -891,6 +891,15 @@ impl PropertyData {
|
|||
(&self.postcode_interner, &self.postcode_keys)
|
||||
}
|
||||
|
||||
/// Property rows for a given postcode string, or empty if unknown.
|
||||
pub fn rows_for_postcode(&self, postcode: &str) -> &[u32] {
|
||||
self.postcode_interner
|
||||
.get(postcode)
|
||||
.and_then(|key| self.postcode_row_index.get(&key))
|
||||
.map(Vec::as_slice)
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
fn row_address_search_tokens(&self, row: usize) -> &[lasso::Spur] {
|
||||
let offset = self.address_search_token_offsets[row] as usize;
|
||||
let length = self.address_search_token_lengths[row] as usize;
|
||||
|
|
|
|||
|
|
@ -426,21 +426,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Education, Skills and Training Score",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Education and skills deprivation percentile (higher = less deprived)",
|
||||
detail: "From the English Indices of Deprivation, converted to a national percentile where 0% is most deprived and 100% is least deprived. Covers school attainment, entry to higher education, adult qualifications, and English language proficiency.",
|
||||
source: "iod",
|
||||
prefix: "",
|
||||
suffix: "%",
|
||||
raw: true,
|
||||
absolute: true,
|
||||
}),
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -476,6 +461,21 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: true,
|
||||
absolute: true,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Education, Skills and Training Score",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Education and skills deprivation percentile (higher = less deprived)",
|
||||
detail: "From the English Indices of Deprivation, converted to a national percentile where 0% is most deprived and 100% is least deprived. Covers school attainment, entry to higher education, adult qualifications, and English language proficiency.",
|
||||
source: "iod",
|
||||
prefix: "",
|
||||
suffix: "%",
|
||||
raw: true,
|
||||
absolute: true,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Health Deprivation and Disability Score",
|
||||
bounds: Bounds::Fixed {
|
||||
|
|
|
|||
|
|
@ -13,12 +13,11 @@ use crate::consts::NAN_U16;
|
|||
use crate::data::ActualListing;
|
||||
use crate::licensing::{check_license_bounds, resolve_share_code};
|
||||
use crate::parsing::{
|
||||
parse_filters_with_poi, require_bounds, row_passes_filters, row_passes_poi_filters,
|
||||
ParsedEnumFilter, ParsedFilter,
|
||||
parse_filters_with_poi, require_bounds, ParsedEnumFilter, ParsedFilter, ParsedPoiFilter,
|
||||
};
|
||||
use crate::state::{AppState, SharedState};
|
||||
|
||||
use super::travel_time::{parse_optional_travel, row_passes_travel_filters, TravelEntry};
|
||||
use super::travel_time::{load_travel_data, parse_optional_travel, row_passes_travel_filters};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ActualListingsParams {
|
||||
|
|
@ -41,17 +40,6 @@ pub struct ActualListingsResponse {
|
|||
pub truncated: bool,
|
||||
}
|
||||
|
||||
const LISTING_LEVEL_FILTER_FEATURES: &[&str] = &[
|
||||
"Property type",
|
||||
"Leasehold/Freehold",
|
||||
"Total floor area (sqm)",
|
||||
"Number of bedrooms & living rooms",
|
||||
"Estimated current price",
|
||||
"Last known price",
|
||||
"Est. price per sqm",
|
||||
"Price per sqm",
|
||||
];
|
||||
|
||||
const KEEP_UNKNOWN_LISTING_FILTER_FEATURES: &[&str] = &["Total floor area (sqm)"];
|
||||
|
||||
pub async fn get_actual_listings(
|
||||
|
|
@ -90,38 +78,23 @@ pub async fn get_actual_listings(
|
|||
let travel_entries = parse_optional_travel(params.travel.as_deref())
|
||||
.map_err(|err| ApiError::BadRequest(err).into_response())?;
|
||||
|
||||
let listing_level_feature_idxs = listing_level_filter_feature_idxs(&state);
|
||||
let keep_unknown_listing_filter_idxs = keep_unknown_listing_filter_feature_idxs(&state);
|
||||
let (listing_filters, postcode_filters) =
|
||||
split_numeric_filters(parsed_filters, &listing_level_feature_idxs);
|
||||
let (listing_enum_filters, postcode_enum_filters) =
|
||||
split_enum_filters(parsed_enum_filters, &listing_level_feature_idxs);
|
||||
let listing_filters = parsed_filters;
|
||||
let listing_enum_filters = parsed_enum_filters;
|
||||
|
||||
let has_postcode_filters = !postcode_filters.is_empty()
|
||||
|| !postcode_enum_filters.is_empty()
|
||||
|| !parsed_poi_filters.is_empty()
|
||||
|| !travel_entries.is_empty();
|
||||
let has_listing_filters = !listing_filters.is_empty() || !listing_enum_filters.is_empty();
|
||||
|
||||
let state_clone = state.clone();
|
||||
let response =
|
||||
tokio::task::spawn_blocking(move || -> Result<ActualListingsResponse, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
|
||||
let passing_postcodes = if has_postcode_filters {
|
||||
Some(compute_passing_postcodes(
|
||||
&state_clone,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
&postcode_filters,
|
||||
&postcode_enum_filters,
|
||||
&parsed_poi_filters,
|
||||
&travel_entries,
|
||||
)?)
|
||||
let has_poi_filters = !parsed_poi_filters.is_empty();
|
||||
let has_travel_filters = !travel_entries.is_empty();
|
||||
let poi_num_features = state_clone.data.poi_metrics.num_features();
|
||||
let travel_data = if has_travel_filters {
|
||||
load_travel_data(&state_clone.travel_time_store, &travel_entries)?
|
||||
} else {
|
||||
None
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let row_indices = actual_listings.grid.query(south, west, north, east);
|
||||
|
|
@ -133,11 +106,6 @@ pub async fn get_actual_listings(
|
|||
.iter()
|
||||
.filter_map(|&row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if let Some(allowed) = passing_postcodes.as_ref() {
|
||||
if !allowed.contains(actual_listings.postcode[row].as_str()) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
if has_listing_filters
|
||||
&& !row_passes_listing_filters(
|
||||
row,
|
||||
|
|
@ -150,6 +118,25 @@ pub async fn get_actual_listings(
|
|||
{
|
||||
return None;
|
||||
}
|
||||
if has_poi_filters
|
||||
&& !row_passes_listing_poi_filters(
|
||||
row,
|
||||
&parsed_poi_filters,
|
||||
&actual_listings.poi_filter_feature_data,
|
||||
poi_num_features,
|
||||
)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
if has_travel_filters
|
||||
&& !row_passes_travel_filters(
|
||||
actual_listings.postcode[row].as_str(),
|
||||
&travel_entries,
|
||||
&travel_data,
|
||||
)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
Some(row)
|
||||
})
|
||||
.collect();
|
||||
|
|
@ -176,8 +163,9 @@ pub async fn get_actual_listings(
|
|||
total = total_matching,
|
||||
total_in_bounds,
|
||||
offset,
|
||||
postcode_filtered = passing_postcodes.is_some(),
|
||||
listing_filtered = has_listing_filters,
|
||||
poi_filtered = has_poi_filters,
|
||||
travel_filtered = has_travel_filters,
|
||||
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||
"GET /api/actual-listings"
|
||||
);
|
||||
|
|
@ -196,10 +184,6 @@ pub async fn get_actual_listings(
|
|||
Ok(Json(response))
|
||||
}
|
||||
|
||||
fn listing_level_filter_feature_idxs(state: &AppState) -> FxHashSet<usize> {
|
||||
feature_idxs(state, LISTING_LEVEL_FILTER_FEATURES)
|
||||
}
|
||||
|
||||
fn keep_unknown_listing_filter_feature_idxs(state: &AppState) -> FxHashSet<usize> {
|
||||
feature_idxs(state, KEEP_UNKNOWN_LISTING_FILTER_FEATURES)
|
||||
}
|
||||
|
|
@ -211,38 +195,6 @@ fn feature_idxs(state: &AppState, names: &[&str]) -> FxHashSet<usize> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn split_numeric_filters(
|
||||
filters: Vec<ParsedFilter>,
|
||||
listing_level_feature_idxs: &FxHashSet<usize>,
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedFilter>) {
|
||||
let mut listing_filters = Vec::new();
|
||||
let mut postcode_filters = Vec::new();
|
||||
for filter in filters {
|
||||
if listing_level_feature_idxs.contains(&filter.feat_idx) {
|
||||
listing_filters.push(filter);
|
||||
} else {
|
||||
postcode_filters.push(filter);
|
||||
}
|
||||
}
|
||||
(listing_filters, postcode_filters)
|
||||
}
|
||||
|
||||
fn split_enum_filters(
|
||||
filters: Vec<ParsedEnumFilter>,
|
||||
listing_level_feature_idxs: &FxHashSet<usize>,
|
||||
) -> (Vec<ParsedEnumFilter>, Vec<ParsedEnumFilter>) {
|
||||
let mut listing_filters = Vec::new();
|
||||
let mut postcode_filters = Vec::new();
|
||||
for filter in filters {
|
||||
if listing_level_feature_idxs.contains(&filter.feat_idx) {
|
||||
listing_filters.push(filter);
|
||||
} else {
|
||||
postcode_filters.push(filter);
|
||||
}
|
||||
}
|
||||
(listing_filters, postcode_filters)
|
||||
}
|
||||
|
||||
fn row_passes_listing_filters(
|
||||
row: usize,
|
||||
filters: &[ParsedFilter],
|
||||
|
|
@ -266,132 +218,33 @@ fn row_passes_listing_filters(
|
|||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn compute_passing_postcodes(
|
||||
state: &AppState,
|
||||
south: f64,
|
||||
west: f64,
|
||||
north: f64,
|
||||
east: f64,
|
||||
parsed_filters: &[crate::parsing::ParsedFilter],
|
||||
parsed_enum_filters: &[crate::parsing::ParsedEnumFilter],
|
||||
parsed_poi_filters: &[crate::parsing::ParsedPoiFilter],
|
||||
travel_entries: &[TravelEntry],
|
||||
) -> Result<FxHashSet<String>, String> {
|
||||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let poi_metrics = &state.data.poi_metrics;
|
||||
let has_poi_filters = !parsed_poi_filters.is_empty();
|
||||
fn row_passes_listing_poi_filters(
|
||||
row: usize,
|
||||
filters: &[ParsedPoiFilter],
|
||||
feature_data: &[u16],
|
||||
num_features: usize,
|
||||
) -> bool {
|
||||
if filters.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if num_features == 0 || feature_data.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let travel_data = if travel_entries.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
let store = &state.travel_time_store;
|
||||
travel_entries
|
||||
.iter()
|
||||
.map(|entry| {
|
||||
store
|
||||
.get(&entry.mode, &entry.slug)
|
||||
.map_err(|err| format!("Failed to load travel data: {}", err))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
};
|
||||
let has_travel = !travel_entries.is_empty();
|
||||
|
||||
let mut passing: FxHashSet<String> = FxHashSet::default();
|
||||
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
parsed_filters,
|
||||
parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
if has_poi_filters && !row_passes_poi_filters(row, parsed_poi_filters, poi_metrics) {
|
||||
return;
|
||||
}
|
||||
let postcode = state.data.postcode(row);
|
||||
if has_travel && !row_passes_travel_filters(postcode, travel_entries, &travel_data) {
|
||||
return;
|
||||
}
|
||||
// Property postcodes share the same canonical "OUT IN" format used by
|
||||
// ActualListingData::load (normalize_postcode), so we can match by string.
|
||||
if !passing.contains(postcode) {
|
||||
passing.insert(postcode.to_string());
|
||||
}
|
||||
});
|
||||
|
||||
Ok(passing)
|
||||
let base = row * num_features;
|
||||
filters.iter().all(|filter| {
|
||||
let raw = feature_data
|
||||
.get(base + filter.metric_idx)
|
||||
.copied()
|
||||
.unwrap_or(NAN_U16);
|
||||
raw != NAN_U16 && raw >= filter.min_u16 && raw <= filter.max_u16
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn numeric_filter(feat_idx: usize) -> ParsedFilter {
|
||||
ParsedFilter {
|
||||
feat_idx,
|
||||
min_u16: 0,
|
||||
max_u16: 100,
|
||||
}
|
||||
}
|
||||
|
||||
fn enum_filter(feat_idx: usize) -> ParsedEnumFilter {
|
||||
ParsedEnumFilter {
|
||||
feat_idx,
|
||||
allowed: [0u16].into_iter().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_actual_listing_filters_by_listing_native_features() {
|
||||
let listing_level_feature_idxs: FxHashSet<usize> = [1usize, 3].into_iter().collect();
|
||||
|
||||
let (listing_filters, postcode_filters) = split_numeric_filters(
|
||||
vec![numeric_filter(0), numeric_filter(1), numeric_filter(3)],
|
||||
&listing_level_feature_idxs,
|
||||
);
|
||||
assert_eq!(
|
||||
listing_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![1, 3]
|
||||
);
|
||||
assert_eq!(
|
||||
postcode_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![0]
|
||||
);
|
||||
|
||||
let (listing_enum_filters, postcode_enum_filters) = split_enum_filters(
|
||||
vec![enum_filter(2), enum_filter(3)],
|
||||
&listing_level_feature_idxs,
|
||||
);
|
||||
assert_eq!(
|
||||
listing_enum_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![3]
|
||||
);
|
||||
assert_eq!(
|
||||
postcode_enum_filters
|
||||
.iter()
|
||||
.map(|filter| filter.feat_idx)
|
||||
.collect::<Vec<_>>(),
|
||||
vec![2]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn listing_floor_area_filter_keeps_unknown_values() {
|
||||
let floor_area_filter = ParsedFilter {
|
||||
|
|
@ -436,4 +289,30 @@ mod tests {
|
|||
&keep_unknown_filter_idxs
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn listing_poi_filter_uses_listing_metric_matrix() {
|
||||
let filter = ParsedPoiFilter {
|
||||
metric_idx: 1,
|
||||
min_u16: 10,
|
||||
max_u16: 20,
|
||||
};
|
||||
|
||||
assert!(row_passes_listing_poi_filters(
|
||||
0,
|
||||
&[filter],
|
||||
&[NAN_U16, 15],
|
||||
2
|
||||
));
|
||||
assert!(!row_passes_listing_poi_filters(
|
||||
0,
|
||||
&[ParsedPoiFilter {
|
||||
metric_idx: 1,
|
||||
min_u16: 10,
|
||||
max_u16: 20,
|
||||
}],
|
||||
&[NAN_U16, NAN_U16],
|
||||
2
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,14 +18,15 @@ use crate::data::{PostcodePoiMetrics, QuantRef};
|
|||
use crate::features;
|
||||
use crate::licensing::{check_license_bounds, resolve_share_code};
|
||||
use crate::parsing::{
|
||||
parse_field_indices_with_poi, parse_filters_with_poi, require_bounds, row_passes_filters,
|
||||
row_passes_poi_filters,
|
||||
parse_bounds, parse_field_indices_with_poi, parse_filters_with_poi, row_passes_filters,
|
||||
row_passes_poi_filters, ParsedEnumFilter, ParsedFilter, ParsedPoiFilter,
|
||||
};
|
||||
use crate::routes::travel_time::{
|
||||
load_travel_data, parse_optional_travel, row_passes_travel_filters,
|
||||
};
|
||||
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
|
||||
use crate::state::SharedState;
|
||||
use crate::utils::normalize_postcode;
|
||||
|
||||
const MAX_EXPORT_POSTCODES: usize = 250;
|
||||
const EXPORT_SCREENSHOT_TIMEOUT_SECS: u64 = 12;
|
||||
|
|
@ -46,6 +47,9 @@ pub struct ExportParams {
|
|||
travel: Option<String>,
|
||||
fields: Option<String>,
|
||||
share: Option<String>,
|
||||
/// Comma-separated list of postcodes for list-mode export. When supplied,
|
||||
/// the bounds / filters / travel parameters are ignored.
|
||||
postcodes: Option<String>,
|
||||
}
|
||||
|
||||
/// Per-postcode accumulator for export aggregation (mean for numeric, mode for enum).
|
||||
|
|
@ -193,6 +197,94 @@ fn collect_overlay_state_params(query: Option<&str>) -> Vec<String> {
|
|||
collect_repeated_state_params(query, "overlay")
|
||||
}
|
||||
|
||||
/// A parsed, deduplicated, validated list of postcodes to export.
|
||||
struct ParsedPostcodeList {
|
||||
/// Resolved (postcode index, normalized postcode) pairs, preserving input order.
|
||||
entries: Vec<(usize, String)>,
|
||||
/// Postcodes the user supplied that were not found in the dataset.
|
||||
unknown: Vec<String>,
|
||||
}
|
||||
|
||||
fn parse_postcode_list(
|
||||
raw: &str,
|
||||
state: &crate::state::AppState,
|
||||
) -> Result<ParsedPostcodeList, axum::response::Response> {
|
||||
let mut entries: Vec<(usize, String)> = Vec::new();
|
||||
let mut unknown: Vec<String> = Vec::new();
|
||||
let mut seen: FxHashSet<usize> = FxHashSet::default();
|
||||
|
||||
for raw_pc in raw.split([',', '\n', ';']) {
|
||||
let trimmed = raw_pc.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let normalized = normalize_postcode(trimmed);
|
||||
if normalized.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if entries.len() >= MAX_EXPORT_POSTCODES {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!(
|
||||
"Too many postcodes; at most {} are supported per export",
|
||||
MAX_EXPORT_POSTCODES
|
||||
),
|
||||
)
|
||||
.into_response());
|
||||
}
|
||||
match state.postcode_data.postcode_to_idx.get(&normalized) {
|
||||
Some(&pc_idx) if seen.insert(pc_idx) => {
|
||||
entries.push((pc_idx, normalized));
|
||||
}
|
||||
Some(_) => {} // duplicate — skip silently
|
||||
None => unknown.push(normalized),
|
||||
}
|
||||
}
|
||||
|
||||
if entries.is_empty() {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"No valid postcodes supplied".to_string(),
|
||||
)
|
||||
.into_response());
|
||||
}
|
||||
|
||||
Ok(ParsedPostcodeList { entries, unknown })
|
||||
}
|
||||
|
||||
/// Tight bounding box around a set of postcode centroids (used for license checks).
|
||||
fn bounds_for_postcode_indices(
|
||||
indices: &[usize],
|
||||
centroids: &[(f32, f32)],
|
||||
) -> (f64, f64, f64, f64) {
|
||||
let mut south = f64::INFINITY;
|
||||
let mut west = f64::INFINITY;
|
||||
let mut north = f64::NEG_INFINITY;
|
||||
let mut east = f64::NEG_INFINITY;
|
||||
for &idx in indices {
|
||||
if let Some(&(lat, lon)) = centroids.get(idx) {
|
||||
let lat = lat as f64;
|
||||
let lon = lon as f64;
|
||||
if lat < south {
|
||||
south = lat;
|
||||
}
|
||||
if lat > north {
|
||||
north = lat;
|
||||
}
|
||||
if lon < west {
|
||||
west = lon;
|
||||
}
|
||||
if lon > east {
|
||||
east = lon;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !south.is_finite() {
|
||||
return (0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
(south, west, north, east)
|
||||
}
|
||||
|
||||
pub async fn get_export(
|
||||
State(shared): State<Arc<SharedState>>,
|
||||
headers: HeaderMap,
|
||||
|
|
@ -201,16 +293,42 @@ pub async fn get_export(
|
|||
Query(params): Query<ExportParams>,
|
||||
) -> Result<impl IntoResponse, axum::response::Response> {
|
||||
let state = shared.load_state();
|
||||
let (south, west, north, east) =
|
||||
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
|
||||
|
||||
let area_deg2 = (north - south).max(0.0) * (east - west).max(0.0);
|
||||
if area_deg2 > MAX_EXPORT_BBOX_AREA_DEG2 {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Export area is too large; zoom in further before exporting",
|
||||
)
|
||||
.into_response());
|
||||
// Two modes: bounds-based (default) and explicit postcode list.
|
||||
let postcode_list = match params.postcodes.as_deref() {
|
||||
Some(raw) if !raw.trim().is_empty() => Some(parse_postcode_list(raw, &state)?),
|
||||
_ => None,
|
||||
};
|
||||
let is_postcode_mode = postcode_list.is_some();
|
||||
if let Some(list) = postcode_list.as_ref() {
|
||||
if !list.unknown.is_empty() {
|
||||
warn!(unknown = ?list.unknown, "Export: unknown postcodes ignored");
|
||||
}
|
||||
}
|
||||
|
||||
let (south, west, north, east) = if let Some(list) = postcode_list.as_ref() {
|
||||
let idxs: Vec<usize> = list.entries.iter().map(|(i, _)| *i).collect();
|
||||
bounds_for_postcode_indices(&idxs, &state.postcode_data.centroids)
|
||||
} else {
|
||||
let raw = params.bounds.clone().ok_or_else(|| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
"bounds or postcodes parameter is required",
|
||||
)
|
||||
.into_response()
|
||||
})?;
|
||||
parse_bounds(&raw).map_err(IntoResponse::into_response)?
|
||||
};
|
||||
|
||||
if !is_postcode_mode {
|
||||
let area_deg2 = (north - south).max(0.0) * (east - west).max(0.0);
|
||||
if area_deg2 > MAX_EXPORT_BBOX_AREA_DEG2 {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Export area is too large; zoom in further before exporting",
|
||||
)
|
||||
.into_response());
|
||||
}
|
||||
}
|
||||
|
||||
let share_bounds = resolve_share_code(&state, params.share.as_deref()).await;
|
||||
|
|
@ -218,24 +336,44 @@ pub async fn get_export(
|
|||
|
||||
let quant = state.data.quant_ref();
|
||||
let poi_quant = state.data.poi_metrics.quant_ref();
|
||||
let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi(
|
||||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
&quant,
|
||||
&state.data.poi_metrics.name_to_index,
|
||||
&poi_quant,
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
|
||||
let (parsed_filters, parsed_enum_filters, parsed_poi_filters): (
|
||||
Vec<ParsedFilter>,
|
||||
Vec<ParsedEnumFilter>,
|
||||
Vec<ParsedPoiFilter>,
|
||||
) = if is_postcode_mode {
|
||||
(Vec::new(), Vec::new(), Vec::new())
|
||||
} else {
|
||||
parse_filters_with_poi(
|
||||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
&quant,
|
||||
&state.data.poi_metrics.name_to_index,
|
||||
&poi_quant,
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?
|
||||
};
|
||||
let has_poi_filters = !parsed_poi_filters.is_empty();
|
||||
let filters_str = params.filters;
|
||||
let travel_entries = parse_optional_travel(params.travel.as_deref())
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
|
||||
let filters_str = if is_postcode_mode { None } else { params.filters };
|
||||
let travel_entries = if is_postcode_mode {
|
||||
Vec::new()
|
||||
} else {
|
||||
parse_optional_travel(params.travel.as_deref())
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?
|
||||
};
|
||||
let has_travel_filters = travel_entries
|
||||
.iter()
|
||||
.any(|entry| entry.filter_min.is_some() && entry.filter_max.is_some());
|
||||
let travel_state_params = collect_travel_state_params(uri.query());
|
||||
let overlay_state_params = collect_overlay_state_params(uri.query());
|
||||
let travel_state_params = if is_postcode_mode {
|
||||
Vec::new()
|
||||
} else {
|
||||
collect_travel_state_params(uri.query())
|
||||
};
|
||||
let overlay_state_params = if is_postcode_mode {
|
||||
Vec::new()
|
||||
} else {
|
||||
collect_overlay_state_params(uri.query())
|
||||
};
|
||||
let fields_str = params.fields;
|
||||
let share_code = params.share;
|
||||
|
||||
|
|
@ -260,29 +398,34 @@ pub async fn get_export(
|
|||
share_code.as_deref(),
|
||||
);
|
||||
|
||||
// Fetch screenshot (async, before spawn_blocking)
|
||||
let auth_header = headers.get(header::AUTHORIZATION);
|
||||
let screenshot_fetch = fetch_screenshot_bytes(&state, &frontend_params, auth_header);
|
||||
let screenshot_bytes = match tokio::time::timeout(
|
||||
Duration::from_secs(EXPORT_SCREENSHOT_TIMEOUT_SECS),
|
||||
screenshot_fetch,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(bytes)) => {
|
||||
info!(bytes = bytes.len(), "Fetched screenshot for export");
|
||||
Some(bytes)
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("Screenshot failed for export: {err}");
|
||||
None
|
||||
}
|
||||
Err(_) => {
|
||||
warn!(
|
||||
timeout_secs = EXPORT_SCREENSHOT_TIMEOUT_SECS,
|
||||
"Screenshot timed out for export"
|
||||
);
|
||||
None
|
||||
// Screenshot only makes sense for the spatial / filter mode. In list mode the
|
||||
// map view is unrelated to the selected postcodes, so we skip it.
|
||||
let screenshot_bytes = if is_postcode_mode {
|
||||
None
|
||||
} else {
|
||||
let auth_header = headers.get(header::AUTHORIZATION);
|
||||
let screenshot_fetch = fetch_screenshot_bytes(&state, &frontend_params, auth_header);
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(EXPORT_SCREENSHOT_TIMEOUT_SECS),
|
||||
screenshot_fetch,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(bytes)) => {
|
||||
info!(bytes = bytes.len(), "Fetched screenshot for export");
|
||||
Some(bytes)
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("Screenshot failed for export: {err}");
|
||||
None
|
||||
}
|
||||
Err(_) => {
|
||||
warn!(
|
||||
timeout_secs = EXPORT_SCREENSHOT_TIMEOUT_SECS,
|
||||
"Screenshot timed out for export"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -302,6 +445,9 @@ pub async fn get_export(
|
|||
})
|
||||
.collect();
|
||||
|
||||
let postcode_list_entries: Option<Vec<(usize, String)>> =
|
||||
postcode_list.map(|list| list.entries);
|
||||
|
||||
let bytes = tokio::task::spawn_blocking(move || -> Result<Vec<u8>, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
let num_features = state.data.num_features;
|
||||
|
|
@ -319,75 +465,102 @@ pub async fn get_export(
|
|||
// Build set of enum feature indices for quick lookup
|
||||
let enum_indices: FxHashMap<usize, ()> = enum_values.keys().map(|&idx| (idx, ())).collect();
|
||||
|
||||
// Aggregate directly by postcode so large requests don't retain every
|
||||
// matching property row before sampling the exported postcodes.
|
||||
let mut postcode_aggs: FxHashMap<usize, PostcodeExportAgg> = FxHashMap::default();
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
if has_poi_filters && !row_passes_poi_filters(row, &parsed_poi_filters, poi_metrics)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
if has_travel_filters
|
||||
&& !row_passes_travel_filters(postcode, &travel_entries, &travel_data)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
|
||||
postcode_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| PostcodeExportAgg::new(total_export_features))
|
||||
.add_row(
|
||||
let (postcode_aggs, was_sampled): (Vec<(usize, PostcodeExportAgg)>, bool) =
|
||||
if let Some(entries) = postcode_list_entries.as_ref() {
|
||||
// List mode: iterate property rows for each requested postcode and
|
||||
// produce results in the order the user supplied them.
|
||||
let mut out: Vec<(usize, PostcodeExportAgg)> = Vec::with_capacity(entries.len());
|
||||
for (pc_idx, _normalized) in entries {
|
||||
let mut agg = PostcodeExportAgg::new(total_export_features);
|
||||
for &row_idx in state.data.rows_for_postcode(
|
||||
&postcode_data.postcodes[*pc_idx],
|
||||
) {
|
||||
agg.add_row(
|
||||
feature_data,
|
||||
row,
|
||||
row_idx as usize,
|
||||
num_features,
|
||||
&enum_indices,
|
||||
&quant,
|
||||
poi_metrics,
|
||||
);
|
||||
}
|
||||
if agg.count > 0 {
|
||||
out.push((*pc_idx, agg));
|
||||
}
|
||||
}
|
||||
});
|
||||
(out, false)
|
||||
} else {
|
||||
// Bounds mode: aggregate directly by postcode so large requests
|
||||
// don't retain every matching property row before sampling.
|
||||
let mut by_pc: FxHashMap<usize, PostcodeExportAgg> = FxHashMap::default();
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
if has_poi_filters
|
||||
&& !row_passes_poi_filters(row, &parsed_poi_filters, poi_metrics)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
if has_travel_filters
|
||||
&& !row_passes_travel_filters(postcode, &travel_entries, &travel_data)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
|
||||
by_pc.entry(pc_idx)
|
||||
.or_insert_with(|| PostcodeExportAgg::new(total_export_features))
|
||||
.add_row(
|
||||
feature_data,
|
||||
row,
|
||||
num_features,
|
||||
&enum_indices,
|
||||
&quant,
|
||||
poi_metrics,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> = postcode_aggs
|
||||
.into_iter()
|
||||
.filter(|(_, agg)| agg.count > 0)
|
||||
.collect();
|
||||
let mut aggs: Vec<(usize, PostcodeExportAgg)> = by_pc
|
||||
.into_iter()
|
||||
.filter(|(_, agg)| agg.count > 0)
|
||||
.collect();
|
||||
|
||||
// Sort by property count descending
|
||||
postcode_aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count));
|
||||
// Sort by property count descending
|
||||
aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count));
|
||||
|
||||
// Sample if too many postcodes
|
||||
let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES;
|
||||
if was_sampled {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
south.to_bits().hash(&mut hasher);
|
||||
west.to_bits().hash(&mut hasher);
|
||||
north.to_bits().hash(&mut hasher);
|
||||
east.to_bits().hash(&mut hasher);
|
||||
let seed = hasher.finish();
|
||||
let was_sampled = aggs.len() > MAX_EXPORT_POSTCODES;
|
||||
if was_sampled {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
south.to_bits().hash(&mut hasher);
|
||||
west.to_bits().hash(&mut hasher);
|
||||
north.to_bits().hash(&mut hasher);
|
||||
east.to_bits().hash(&mut hasher);
|
||||
let seed = hasher.finish();
|
||||
|
||||
let len = postcode_aggs.len();
|
||||
for pick in 0..MAX_EXPORT_POSTCODES {
|
||||
let swap_idx = pick
|
||||
+ ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64)) as usize
|
||||
% (len - pick));
|
||||
postcode_aggs.swap(pick, swap_idx);
|
||||
}
|
||||
postcode_aggs.truncate(MAX_EXPORT_POSTCODES);
|
||||
postcode_aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count));
|
||||
}
|
||||
let len = aggs.len();
|
||||
for pick in 0..MAX_EXPORT_POSTCODES {
|
||||
let swap_idx = pick
|
||||
+ ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64))
|
||||
as usize
|
||||
% (len - pick));
|
||||
aggs.swap(pick, swap_idx);
|
||||
}
|
||||
aggs.truncate(MAX_EXPORT_POSTCODES);
|
||||
aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count));
|
||||
}
|
||||
(aggs, was_sampled)
|
||||
};
|
||||
|
||||
// Determine column order: filter features first, then remaining
|
||||
let filter_feature_names = extract_filter_feature_names(filters_str.as_deref());
|
||||
|
|
@ -545,12 +718,18 @@ pub async fn get_export(
|
|||
frontend_params
|
||||
);
|
||||
|
||||
// Sheet 1: "Selected" (filter features only) with link + screenshot
|
||||
// Sheet 2: "All Data" (all features)
|
||||
let sheet_configs: [(&str, &[usize], bool); 2] = [
|
||||
("Selected", &filter_feature_indices, true),
|
||||
("All Data", &all_feature_indices, false),
|
||||
];
|
||||
// Bounds mode: two sheets — "Selected" (filter features with link + screenshot)
|
||||
// and "All Data" (all features).
|
||||
// List mode: single sheet "Postcodes" with all data, no link or screenshot
|
||||
// (the supplied list isn't tied to a map view).
|
||||
let sheet_configs: Vec<(&str, &[usize], bool)> = if postcode_list_entries.is_some() {
|
||||
vec![("Postcodes", &all_feature_indices, false)]
|
||||
} else {
|
||||
vec![
|
||||
("Selected", &filter_feature_indices, true),
|
||||
("All Data", &all_feature_indices, false),
|
||||
]
|
||||
};
|
||||
|
||||
for (sheet_name, feat_indices, include_header) in &sheet_configs {
|
||||
let sheet = workbook.add_worksheet();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue