Fix crime & add actual listings
Some checks failed
CI / Check (push) Failing after 4m1s
Build and publish Docker image / build-and-push (push) Failing after 4m10s

This commit is contained in:
Andras Schmelczer 2026-05-17 11:12:25 +01:00
parent 017902b8e6
commit ebe7bbb51d
34 changed files with 2014 additions and 172754 deletions

View file

@ -21,15 +21,21 @@ pub struct PlaceData {
pub travel_destination: Vec<bool>,
}
#[derive(Clone, Copy)]
pub(super) struct CityCandidate<'a> {
pub(super) name: &'a str,
pub(super) lat: f32,
pub(super) lon: f32,
name: &'a str,
lat: f32,
lon: f32,
population: u32,
max_dist_sq: f32,
}
const PARENT_CITY_MAX_DIST_SQ: f32 = 0.81;
const LONDON_DISPLAY_MAX_DEGREES: f32 = 30.0 / 111.0;
const LONDON_DISPLAY_MAX_DIST_SQ: f32 = LONDON_DISPLAY_MAX_DEGREES * LONDON_DISPLAY_MAX_DEGREES;
const SUBSUMED_CITY_MAX_DEGREES: f32 = 5.0 / 111.0;
const SUBSUMED_CITY_MAX_DIST_SQ: f32 = SUBSUMED_CITY_MAX_DEGREES * SUBSUMED_CITY_MAX_DEGREES;
const SUBSUMED_CITY_MIN_POPULATION_RATIO: u32 = 10;
fn type_rank(place_type: &str) -> u8 {
match place_type {
@ -47,15 +53,80 @@ pub fn is_travel_destination_type(place_type: &str) -> bool {
matches!(place_type, "city" | "station" | "university")
}
fn distance_sq(lat: f32, lon: f32, city: &CityCandidate<'_>) -> f32 {
let cos_lat = lat.to_radians().cos();
let dlat = city.lat - lat;
let dlon = (city.lon - lon) * cos_lat;
dlat * dlat + dlon * dlon
impl<'a> CityCandidate<'a> {
fn from_place(name: &'a str, lat: f32, lon: f32, population: u32) -> Self {
let max_dist_sq = if name == "London" {
LONDON_DISPLAY_MAX_DIST_SQ
} else {
PARENT_CITY_MAX_DIST_SQ
};
Self {
name,
lat,
lon,
population,
max_dist_sq,
}
}
fn distance_sq(&self, lat: f32, lon: f32, cos_lat: f32) -> f32 {
let dlat = self.lat - lat;
let dlon = (self.lon - lon) * cos_lat;
dlat * dlat + dlon * dlon
}
fn is_subsumed_by(&self, other: &Self) -> bool {
if self.population == 0 {
return false;
}
let min_parent_population =
u64::from(self.population) * u64::from(SUBSUMED_CITY_MIN_POPULATION_RATIO);
if u64::from(other.population) < min_parent_population {
return false;
}
other.distance_sq(self.lat, self.lon, self.lat.to_radians().cos())
< SUBSUMED_CITY_MAX_DIST_SQ
}
}
fn is_london_city_name(name: &str) -> bool {
matches!(name, "London" | "Westminster" | "City of London")
pub(super) fn display_city_candidates<'a>(
names: &'a [String],
type_rank: &[u8],
population: &[u32],
lat: &[f32],
lon: &[f32],
) -> Vec<CityCandidate<'a>> {
let cities: Vec<CityCandidate<'_>> = type_rank
.iter()
.enumerate()
.filter_map(|(idx, &rank)| {
if rank == 0 {
Some(CityCandidate::from_place(
&names[idx],
lat[idx],
lon[idx],
population[idx],
))
} else {
None
}
})
.collect();
cities
.iter()
.enumerate()
.filter_map(|(idx, city)| {
let is_subsumed = cities
.iter()
.enumerate()
.any(|(other_idx, other)| other_idx != idx && city.is_subsumed_by(other));
(!is_subsumed).then_some(*city)
})
.collect()
}
pub(super) fn nearest_display_city<'a>(
@ -63,35 +134,13 @@ pub(super) fn nearest_display_city<'a>(
lon: f32,
cities: &'a [CityCandidate<'a>],
) -> Option<&'a str> {
let mut best_dist_sq = f32::MAX;
let mut best_city: Option<&CityCandidate<'_>> = None;
let mut london_dist_sq: Option<f32> = None;
let cos_lat = lat.to_radians().cos();
let (best_city, best_dist_sq) = cities
.iter()
.map(|city| (city, city.distance_sq(lat, lon, cos_lat)))
.min_by(|(_, lhs), (_, rhs)| lhs.total_cmp(rhs))?;
for city in cities {
let dist_sq = distance_sq(lat, lon, city);
if city.name == "London" {
london_dist_sq = Some(dist_sq);
}
if dist_sq < best_dist_sq {
best_dist_sq = dist_sq;
best_city = Some(city);
}
}
let best_city = best_city?;
if best_dist_sq >= PARENT_CITY_MAX_DIST_SQ {
return None;
}
if is_london_city_name(best_city.name) {
if london_dist_sq.is_some_and(|dist_sq| dist_sq < LONDON_DISPLAY_MAX_DIST_SQ) {
Some("London")
} else {
None
}
} else {
Some(best_city.name)
}
(best_dist_sq < best_city.max_dist_sq).then_some(best_city.name)
}
pub fn normalize_search_text(text: &str) -> String {
@ -306,19 +355,8 @@ impl PlaceData {
let display_city_override = extract_optional_str_col(&df, "display_city")?;
// Precompute nearest city for each non-city place
let city_indices: Vec<usize> = type_rank_vec
.iter()
.enumerate()
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
.collect();
let city_candidates: Vec<CityCandidate<'_>> = city_indices
.iter()
.map(|&idx| CityCandidate {
name: &name[idx],
lat: lat[idx],
lon: lon[idx],
})
.collect();
let city_candidates =
display_city_candidates(&name, &type_rank_vec, &population, &lat, &lon);
let fallback_city: Vec<Option<String>> = (0..row_count)
.map(|idx| {
@ -379,36 +417,41 @@ impl PlaceData {
mod tests {
use super::*;
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
vec![
CityCandidate {
name: "London",
lat: 51.5074456,
lon: -0.1277653,
},
CityCandidate {
name: "Westminster",
lat: 51.4973206,
lon: -0.137149,
},
CityCandidate {
name: "City of London",
lat: 51.5156177,
lon: -0.0919983,
},
CityCandidate {
name: "Cambridge",
lat: 52.2055314,
lon: 0.1186637,
},
CityCandidate {
name: "Oxford",
lat: 51.7520131,
lon: -1.2578499,
},
fn test_city_rows() -> [(&'static str, f32, f32, u32); 5] {
[
("London", 51.5074456, -0.1277653, 8_908_083),
("Westminster", 51.4973206, -0.137149, 211_365),
("City of London", 51.5156177, -0.0919983, 10_847),
("Cambridge", 52.2055314, 0.1186637, 145_818),
("Oxford", 51.7520131, -1.2578499, 165_000),
]
}
fn all_test_city_candidates() -> Vec<CityCandidate<'static>> {
test_city_rows()
.into_iter()
.map(|(name, lat, lon, population)| {
CityCandidate::from_place(name, lat, lon, population)
})
.collect()
}
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
let cities = all_test_city_candidates();
cities
.iter()
.enumerate()
.filter_map(|(idx, city)| {
let is_subsumed = cities
.iter()
.enumerate()
.any(|(other_idx, other)| other_idx != idx && city.is_subsumed_by(other));
(!is_subsumed).then_some(*city)
})
.collect()
}
#[test]
fn type_rank_ordering() {
assert!(type_rank("city") < type_rank("town"));
@ -433,7 +476,30 @@ mod tests {
}
#[test]
fn nearest_display_city_canonicalizes_greater_london_aliases() {
fn display_city_candidates_drop_city_nodes_subsumed_by_much_larger_nearby_city() {
let rows = test_city_rows();
let names: Vec<String> = rows
.iter()
.map(|(name, _, _, _)| name.to_string())
.collect();
let type_rank: Vec<u8> = vec![0; rows.len()];
let population: Vec<u32> = rows
.iter()
.map(|(_, _, _, population)| *population)
.collect();
let lat: Vec<f32> = rows.iter().map(|(_, lat, _, _)| *lat).collect();
let lon: Vec<f32> = rows.iter().map(|(_, _, lon, _)| *lon).collect();
let cities = display_city_candidates(&names, &type_rank, &population, &lat, &lon);
assert_eq!(
cities.iter().map(|city| city.name).collect::<Vec<_>>(),
["London", "Cambridge", "Oxford"]
);
}
#[test]
fn nearest_display_city_labels_inner_greater_london_from_london_candidate() {
let cities = test_city_candidates();
assert_eq!(
@ -453,7 +519,7 @@ mod tests {
}
#[test]
fn nearest_display_city_does_not_leak_westminster_label_past_london_guard() {
fn nearest_display_city_does_not_extend_london_past_its_display_radius() {
let cities = test_city_candidates();
assert_eq!(nearest_display_city(51.5093, -0.5954, &cities), None);