Lint

2026-05-06 23:13:58 +01:00 · 2026-05-06 23:13:58 +01:00 · 5c3b87f2d5
commit 5c3b87f2d5
parent 94f9c0d594
69 changed files with 1334 additions and 213 deletions
--- a/pipeline/download/geolytix_retail_points.py
+++ b/pipeline/download/geolytix_retail_points.py
@ -42,9 +42,7 @@ def select_latest_csv_name(names: list[str]) -> str:
        match = CSV_NAME_RE.match(path.name)
        if not match:
            continue
-        candidates.append(
-            (match.group("release"), int(match.group("version")), name)
-        )
+        candidates.append((match.group("release"), int(match.group("version")), name))

    if not candidates:
        raise ValueError("No root-level GEOLYTIX retail points CSV found")
--- a/pipeline/download/map_assets.py
+++ b/pipeline/download/map_assets.py
@ -9,6 +9,7 @@ from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
 GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
 SPRITES_BASE = "https://protomaps.github.io/basemaps-assets/sprites/v4"
 TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
+POI_ICON_BASE = "https://geolytix.github.io/MapIcons"

 # Font stacks used by @protomaps/basemaps with lang='en'
 FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
@ -16,6 +17,50 @@ FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
 # Fallback emoji not in any category
 _FALLBACK_EMOJIS = ["📍"]

+POI_ICON_PATHS = [
+    "asda/asda_express_24px.svg",
+    "asda/asda_green_basket_24px.svg",
+    "asda/asda_green_trolley_24px.svg",
+    "asda/asda_living_24px.svg",
+    "asda/asda_pfs_24px.svg",
+    "asda/asda_primary.svg",
+    "asda/asda_superstore_green_trolley_24px.svg",
+    "brands/aldi_24px.svg",
+    "brands/amazon_fresh_alt_24px.svg",
+    "brands/booths_24px.svg",
+    "brands/budgens_24px.svg",
+    "brands/centra_24px.svg",
+    "brands/cook.svg",
+    "brands/coop_24px.svg",
+    "brands/costco_24px.svg",
+    "brands/dunnes_stores_24px.svg",
+    "brands/farmfoods_updated_24px.svg",
+    "brands/heron_24px.svg",
+    "brands/iceland_24px.svg",
+    "brands/iceland_food_warehouse_24px.svg",
+    "brands/lidl_24px.svg",
+    "brands/little_waitrose_24px.svg",
+    "brands/makro_24px.svg",
+    "brands/mns_24px.svg",
+    "brands/mns_food_24px.svg",
+    "brands/mns_high_street_24px.svg",
+    "brands/mns_hospital_24px.svg",
+    "brands/mns_moto_24px.svg",
+    "brands/mns_outlet_24px.svg",
+    "brands/morrisons_24px.svg",
+    "brands/morrisons_daily_24px.svg",
+    "brands/sainsburys_24px.svg",
+    "brands/sainsburys_local_24px.svg",
+    "brands/spar_24px.svg",
+    "brands/tesco_24px.svg",
+    "brands/tesco_express_24px.svg",
+    "brands/tesco_extra_24px.svg",
+    "brands/waitrose_24px.svg",
+    "brands/wholefoods_24px.svg",
+    "logos/planet_organic_24px.svg",
+    "public_transport/london_tube.svg",
+]
+

 def collect_twemoji_codes() -> list[str]:
    """Derive twemoji hex codes from transform_poi categories.
@ -93,6 +138,12 @@ def main():
        url = f"{TWEMOJI_BASE}/{code}.png"
        tasks.append((url, twemoji_dir / f"{code}.png"))

+    # Branded POI icons are served from this local bundle at runtime.
+    poi_icons_dir = out / "poi-icons"
+    for icon_path in POI_ICON_PATHS:
+        url = f"{POI_ICON_BASE}/{icon_path}"
+        tasks.append((url, poi_icons_dir / icon_path))
+
    # Skip already-downloaded files
    remaining = [(url, dest) for url, dest in tasks]

--- a/pipeline/download/median_age.py
+++ b/pipeline/download/median_age.py
@ -23,24 +23,24 @@ PAGE_SIZE = 25000
 # Five-year age bands in order, with lower bounds for interpolation.
 # The last band (85+) is open-ended — we treat it as 85-89 for median purposes.
 AGE_BANDS = [
-    (0, 5),    # Aged 0 to 4 years
-    (5, 5),    # Aged 5 to 9 years
-    (10, 5),   # Aged 10 to 14 years
-    (15, 5),   # Aged 15 to 19 years
-    (20, 5),   # Aged 20 to 24 years
-    (25, 5),   # Aged 25 to 29 years
-    (30, 5),   # Aged 30 to 34 years
-    (35, 5),   # Aged 35 to 39 years
-    (40, 5),   # Aged 40 to 44 years
-    (45, 5),   # Aged 45 to 49 years
-    (50, 5),   # Aged 50 to 54 years
-    (55, 5),   # Aged 55 to 59 years
-    (60, 5),   # Aged 60 to 64 years
-    (65, 5),   # Aged 65 to 69 years
-    (70, 5),   # Aged 70 to 74 years
-    (75, 5),   # Aged 75 to 79 years
-    (80, 5),   # Aged 80 to 84 years
-    (85, 5),   # Aged 85 years and over
+    (0, 5),  # Aged 0 to 4 years
+    (5, 5),  # Aged 5 to 9 years
+    (10, 5),  # Aged 10 to 14 years
+    (15, 5),  # Aged 15 to 19 years
+    (20, 5),  # Aged 20 to 24 years
+    (25, 5),  # Aged 25 to 29 years
+    (30, 5),  # Aged 30 to 34 years
+    (35, 5),  # Aged 35 to 39 years
+    (40, 5),  # Aged 40 to 44 years
+    (45, 5),  # Aged 45 to 49 years
+    (50, 5),  # Aged 50 to 54 years
+    (55, 5),  # Aged 55 to 59 years
+    (60, 5),  # Aged 60 to 64 years
+    (65, 5),  # Aged 65 to 69 years
+    (70, 5),  # Aged 70 to 74 years
+    (75, 5),  # Aged 75 to 79 years
+    (80, 5),  # Aged 80 to 84 years
+    (85, 5),  # Aged 85 years and over
 ]


@ -110,14 +110,18 @@ def download_and_convert(output_path: Path) -> None:
    for row in rows:
        counts = [row[col] for col in band_cols]
        median = compute_median_age(counts)
-        medians.append({"lsoa21": row["GEOGRAPHY_CODE"], "median_age": round(median, 1)})
+        medians.append(
+            {"lsoa21": row["GEOGRAPHY_CODE"], "median_age": round(median, 1)}
+        )

    result = pl.DataFrame(medians).with_columns(
        pl.col("median_age").cast(pl.Float32),
    )

    print(f"England LSOAs: {result.height}")
-    print(f"Median age range: {result['median_age'].min()} - {result['median_age'].max()}")
+    print(
+        f"Median age range: {result['median_age'].min()} - {result['median_age'].max()}"
+    )
    print(f"Mean of medians: {result['median_age'].mean():.1f}")

    output_path.parent.mkdir(parents=True, exist_ok=True)
--- a/pipeline/download/rental_prices.py
+++ b/pipeline/download/rental_prices.py
@ -43,9 +43,7 @@ def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:

    # Filter to English local authorities
    df = df.filter(
-        pl.any_horizontal(
-            pl.col("area_code").str.starts_with(p) for p in LA_PREFIXES
-        )
+        pl.any_horizontal(pl.col("area_code").str.starts_with(p) for p in LA_PREFIXES)
    )

    # Use only the latest month
--- a/pipeline/download/tiles.py
+++ b/pipeline/download/tiles.py
@ -23,7 +23,9 @@ def find_latest_build() -> str:
    for i in range(MAX_AGE_DAYS):
        d = today - timedelta(days=i)
        url = f"{PROTOMAPS_BASE}/{d:%Y%m%d}.pmtiles"
-        req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": USER_AGENT})
+        req = urllib.request.Request(
+            url, method="HEAD", headers={"User-Agent": USER_AGENT}
+        )
        try:
            urllib.request.urlopen(req)
            print(f"Found build: {d:%Y%m%d}")