All good
Some checks failed
CI / Check (push) Has been cancelled
Build and publish Docker image / build-and-push (push) Has been cancelled

This commit is contained in:
Andras Schmelczer 2026-05-18 21:20:10 +01:00
parent 6ea544a0f6
commit 6cc7288126
45 changed files with 929 additions and 1043 deletions

View file

@ -10,6 +10,8 @@ import pyarrow as pa
import pyarrow.csv as pa_csv
import pyarrow.parquet as pq
from pipeline.local_temp import local_tmp_dir
from ..utils import (
fuzzy_join_on_postcode,
normalize_address_key,
@ -192,7 +194,9 @@ def main():
)
args = parser.parse_args()
with tempfile.TemporaryDirectory(prefix="epc_certificates_") as tmpdir:
with tempfile.TemporaryDirectory(
prefix="epc_certificates_", dir=local_tmp_dir()
) as tmpdir:
_run(args.epc, args.price_paid, args.output, Path(tmpdir))

View file

@ -3,6 +3,8 @@ from pathlib import Path
import numpy as np
import polars as pl
from pipeline.local_temp import local_tmp_dir
from .memory import release_memory
@ -17,7 +19,9 @@ def load_uprns(uprn_path: Path) -> tuple[pl.DataFrame, dict[str, tuple[int, int]
print("Loading UPRN lookup...")
# Sort via streaming sink to avoid polars doubling memory during in-memory sort
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
with tempfile.NamedTemporaryFile(
suffix=".parquet", delete=False, dir=local_tmp_dir()
) as tmp:
tmp_path = Path(tmp.name)
(
pl.scan_parquet(uprn_path)

View file

@ -79,6 +79,39 @@ def test_transform_grocery_retail_points_keeps_fascia_icon_category():
]
def test_transform_grocery_retail_points_merges_cooperative_societies():
raw = pl.DataFrame(
{
"id": [101, 102, 103],
"retailer": [
"Central England Co-operative",
"Lincolnshire Co-operative",
"The Southern Co-operative",
],
"fascia": [
"Central England Co-operative",
"The Co-operative Food",
None,
],
"store_name": [
"Central Co-op Test",
"Lincolnshire Co-op Test",
"Southern Co-op Test",
],
"long_wgs": [-0.141, -0.142, -0.143],
"lat_wgs": [51.515, 51.516, 51.517],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Co-op", "icon_category": "Co-op"},
{"category": "Co-op", "icon_category": "Co-op"},
{"category": "Co-op", "icon_category": "Co-op"},
]
def test_transform_grocery_retail_points_accepts_base_fascias():
raw = pl.DataFrame(
{

View file

@ -623,6 +623,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/outpost",
"shop/pawnbroker",
"shop/photo",
"shop/photo_studio",
"shop/plant_hire",
"shop/printer_ink",
"shop/printing",
@ -843,6 +844,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"healthcare/physiotherapist",
"healthcare/podiatrist",
"healthcare/occupational_therapist",
],
),
(
@ -1171,7 +1173,6 @@ GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES: dict[str, str] = {
"Heron": "Heron Foods",
"Marks and Spencer": "M&S",
"Sainsburys": "Sainsbury's",
"The Co-operative Group": "Co-op",
}
@ -1238,6 +1239,8 @@ def normalize_grocery_retailer(retailer: str | None) -> str:
if retailer is None:
return ""
retailer = retailer.strip()
if retailer in COOP_RETAILERS:
return "Co-op"
return GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES.get(retailer, retailer)