Don't fail on new poi

This commit is contained in:
Andras Schmelczer 2026-06-11 08:30:48 +01:00
parent 6a33b03fdf
commit 7c0e8634f2

View file

@ -1712,13 +1712,13 @@ def transform(
lf.select("category").unique().collect(engine="streaming").to_series().to_list()
)
# Verify every non-dropped category has a mapping
# Warn about (and ignore) any category lacking a mapping
unmapped = []
for cat in all_categories:
if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP:
unmapped.append(cat)
if unmapped:
raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}")
print(f"Ignoring categories missing from CATEGORY_MAP: {sorted(unmapped)}")
# Warn about CATEGORY_MAP keys not in data (may be absent in regional extracts)
mapped_but_absent = []
@ -1731,8 +1731,8 @@ def transform(
f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}"
)
# Drop unwanted categories
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
# Drop unwanted and unmapped categories
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES) + unmapped))
# Drop UNNAMED instances of private-dominated tags (gardens, pitches,
# pools) so they don't inflate Park / Sports Centre proximity counts. Done