all good
This commit is contained in:
parent
47d89f6fad
commit
017902b8e6
82 changed files with 331466 additions and 54841 deletions
|
|
@ -21,6 +21,16 @@ pub struct PlaceData {
|
|||
pub travel_destination: Vec<bool>,
|
||||
}
|
||||
|
||||
pub(super) struct CityCandidate<'a> {
|
||||
pub(super) name: &'a str,
|
||||
pub(super) lat: f32,
|
||||
pub(super) lon: f32,
|
||||
}
|
||||
|
||||
const PARENT_CITY_MAX_DIST_SQ: f32 = 0.81;
|
||||
const LONDON_DISPLAY_MAX_DEGREES: f32 = 30.0 / 111.0;
|
||||
const LONDON_DISPLAY_MAX_DIST_SQ: f32 = LONDON_DISPLAY_MAX_DEGREES * LONDON_DISPLAY_MAX_DEGREES;
|
||||
|
||||
fn type_rank(place_type: &str) -> u8 {
|
||||
match place_type {
|
||||
"city" => 0,
|
||||
|
|
@ -37,6 +47,53 @@ pub fn is_travel_destination_type(place_type: &str) -> bool {
|
|||
matches!(place_type, "city" | "station" | "university")
|
||||
}
|
||||
|
||||
fn distance_sq(lat: f32, lon: f32, city: &CityCandidate<'_>) -> f32 {
|
||||
let cos_lat = lat.to_radians().cos();
|
||||
let dlat = city.lat - lat;
|
||||
let dlon = (city.lon - lon) * cos_lat;
|
||||
dlat * dlat + dlon * dlon
|
||||
}
|
||||
|
||||
fn is_london_city_name(name: &str) -> bool {
|
||||
matches!(name, "London" | "Westminster" | "City of London")
|
||||
}
|
||||
|
||||
pub(super) fn nearest_display_city<'a>(
|
||||
lat: f32,
|
||||
lon: f32,
|
||||
cities: &'a [CityCandidate<'a>],
|
||||
) -> Option<&'a str> {
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&CityCandidate<'_>> = None;
|
||||
let mut london_dist_sq: Option<f32> = None;
|
||||
|
||||
for city in cities {
|
||||
let dist_sq = distance_sq(lat, lon, city);
|
||||
if city.name == "London" {
|
||||
london_dist_sq = Some(dist_sq);
|
||||
}
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(city);
|
||||
}
|
||||
}
|
||||
|
||||
let best_city = best_city?;
|
||||
if best_dist_sq >= PARENT_CITY_MAX_DIST_SQ {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_london_city_name(best_city.name) {
|
||||
if london_dist_sq.is_some_and(|dist_sq| dist_sq < LONDON_DISPLAY_MAX_DIST_SQ) {
|
||||
Some("London")
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
Some(best_city.name)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn normalize_search_text(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len());
|
||||
let mut last_was_space = true;
|
||||
|
|
@ -182,6 +239,25 @@ fn extract_bool_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<bool>> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn extract_optional_str_col(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
) -> anyhow::Result<Option<Vec<Option<String>>>> {
|
||||
let column = match df.column(name) {
|
||||
Ok(column) => column,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(Some(
|
||||
string_column
|
||||
.into_iter()
|
||||
.map(|value| value.map(ToString::to_string))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
impl PlaceData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
|
|
@ -227,6 +303,7 @@ impl PlaceData {
|
|||
.map(|place_type| is_travel_destination_type(place_type))
|
||||
.collect()
|
||||
};
|
||||
let display_city_override = extract_optional_str_col(&df, "display_city")?;
|
||||
|
||||
// Precompute nearest city for each non-city place
|
||||
let city_indices: Vec<usize> = type_rank_vec
|
||||
|
|
@ -234,37 +311,45 @@ impl PlaceData {
|
|||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
let city_candidates: Vec<CityCandidate<'_>> = city_indices
|
||||
.iter()
|
||||
.map(|&idx| CityCandidate {
|
||||
name: &name[idx],
|
||||
lat: lat[idx],
|
||||
lon: lon[idx],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let city: Vec<Option<String>> = (0..row_count)
|
||||
let fallback_city: Vec<Option<String>> = (0..row_count)
|
||||
.map(|idx| {
|
||||
if type_rank_vec[idx] == 0 {
|
||||
return None; // Cities don't need a city label
|
||||
}
|
||||
let plat = lat[idx];
|
||||
let plon = lon[idx];
|
||||
let cos_lat = (plat.to_radians()).cos();
|
||||
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&str> = None;
|
||||
for &ci in &city_indices {
|
||||
let dlat = lat[ci] - plat;
|
||||
let dlon = (lon[ci] - plon) * cos_lat;
|
||||
let dist_sq = dlat * dlat + dlon * dlon;
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(&name[ci]);
|
||||
}
|
||||
}
|
||||
|
||||
// ~100km threshold: 1° ≈ 111km, so 0.9° ≈ 100km → 0.81 squared
|
||||
if best_dist_sq < 0.81 {
|
||||
best_city.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
nearest_display_city(lat[idx], lon[idx], &city_candidates).map(str::to_string)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let city: Vec<Option<String>> = if let Some(display_city_override) = display_city_override {
|
||||
fallback_city
|
||||
.into_iter()
|
||||
.zip(display_city_override)
|
||||
.enumerate()
|
||||
.map(|(idx, (fallback, override_city))| {
|
||||
if type_rank_vec[idx] == 0 {
|
||||
return None;
|
||||
}
|
||||
override_city
|
||||
.and_then(|value| {
|
||||
let trimmed = value.trim();
|
||||
(!trimmed.is_empty()).then(|| trimmed.to_string())
|
||||
})
|
||||
.or(fallback)
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
fallback_city
|
||||
};
|
||||
|
||||
let with_pop = population.iter().filter(|&&pop| pop > 0).count();
|
||||
let with_city = city.iter().filter(|c| c.is_some()).count();
|
||||
info!(
|
||||
|
|
@ -294,6 +379,36 @@ impl PlaceData {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
|
||||
vec![
|
||||
CityCandidate {
|
||||
name: "London",
|
||||
lat: 51.5074456,
|
||||
lon: -0.1277653,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Westminster",
|
||||
lat: 51.4973206,
|
||||
lon: -0.137149,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "City of London",
|
||||
lat: 51.5156177,
|
||||
lon: -0.0919983,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Cambridge",
|
||||
lat: 52.2055314,
|
||||
lon: 0.1186637,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Oxford",
|
||||
lat: 51.7520131,
|
||||
lon: -1.2578499,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_rank_ordering() {
|
||||
assert!(type_rank("city") < type_rank("town"));
|
||||
|
|
@ -316,4 +431,41 @@ mod tests {
|
|||
assert!(!is_travel_destination_type("town"));
|
||||
assert!(!is_travel_destination_type("suburb"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_canonicalizes_greater_london_aliases() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(51.3713049, -0.101957, &cities),
|
||||
Some("London")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_preserves_non_london_duplicates() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(52.1277704, -0.0813098, &cities),
|
||||
Some("Cambridge")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_does_not_leak_westminster_label_past_london_guard() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(nearest_display_city(51.5093, -0.5954, &cities), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_keeps_normal_non_london_city() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(51.456659, -0.969651, &cities),
|
||||
Some("Oxford")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue