All good
This commit is contained in:
parent
6ea544a0f6
commit
6cc7288126
45 changed files with 929 additions and 1043 deletions
|
|
@ -10,6 +10,8 @@ import pyarrow as pa
|
|||
import pyarrow.csv as pa_csv
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
|
||||
from ..utils import (
|
||||
fuzzy_join_on_postcode,
|
||||
normalize_address_key,
|
||||
|
|
@ -192,7 +194,9 @@ def main():
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="epc_certificates_") as tmpdir:
|
||||
with tempfile.TemporaryDirectory(
|
||||
prefix="epc_certificates_", dir=local_tmp_dir()
|
||||
) as tmpdir:
|
||||
_run(args.epc, args.price_paid, args.output, Path(tmpdir))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ from pathlib import Path
|
|||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
|
||||
from .memory import release_memory
|
||||
|
||||
|
||||
|
|
@ -17,7 +19,9 @@ def load_uprns(uprn_path: Path) -> tuple[pl.DataFrame, dict[str, tuple[int, int]
|
|||
print("Loading UPRN lookup...")
|
||||
|
||||
# Sort via streaming sink to avoid polars doubling memory during in-memory sort
|
||||
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
suffix=".parquet", delete=False, dir=local_tmp_dir()
|
||||
) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
(
|
||||
pl.scan_parquet(uprn_path)
|
||||
|
|
|
|||
|
|
@ -79,6 +79,39 @@ def test_transform_grocery_retail_points_keeps_fascia_icon_category():
|
|||
]
|
||||
|
||||
|
||||
def test_transform_grocery_retail_points_merges_cooperative_societies():
|
||||
raw = pl.DataFrame(
|
||||
{
|
||||
"id": [101, 102, 103],
|
||||
"retailer": [
|
||||
"Central England Co-operative",
|
||||
"Lincolnshire Co-operative",
|
||||
"The Southern Co-operative",
|
||||
],
|
||||
"fascia": [
|
||||
"Central England Co-operative",
|
||||
"The Co-operative Food",
|
||||
None,
|
||||
],
|
||||
"store_name": [
|
||||
"Central Co-op Test",
|
||||
"Lincolnshire Co-op Test",
|
||||
"Southern Co-op Test",
|
||||
],
|
||||
"long_wgs": [-0.141, -0.142, -0.143],
|
||||
"lat_wgs": [51.515, 51.516, 51.517],
|
||||
}
|
||||
)
|
||||
|
||||
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
||||
|
||||
assert pois.select("category", "icon_category").to_dicts() == [
|
||||
{"category": "Co-op", "icon_category": "Co-op"},
|
||||
{"category": "Co-op", "icon_category": "Co-op"},
|
||||
{"category": "Co-op", "icon_category": "Co-op"},
|
||||
]
|
||||
|
||||
|
||||
def test_transform_grocery_retail_points_accepts_base_fascias():
|
||||
raw = pl.DataFrame(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -623,6 +623,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/outpost",
|
||||
"shop/pawnbroker",
|
||||
"shop/photo",
|
||||
"shop/photo_studio",
|
||||
"shop/plant_hire",
|
||||
"shop/printer_ink",
|
||||
"shop/printing",
|
||||
|
|
@ -843,6 +844,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"healthcare/physiotherapist",
|
||||
"healthcare/podiatrist",
|
||||
"healthcare/occupational_therapist",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -1171,7 +1173,6 @@ GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES: dict[str, str] = {
|
|||
"Heron": "Heron Foods",
|
||||
"Marks and Spencer": "M&S",
|
||||
"Sainsburys": "Sainsbury's",
|
||||
"The Co-operative Group": "Co-op",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1238,6 +1239,8 @@ def normalize_grocery_retailer(retailer: str | None) -> str:
|
|||
if retailer is None:
|
||||
return ""
|
||||
retailer = retailer.strip()
|
||||
if retailer in COOP_RETAILERS:
|
||||
return "Co-op"
|
||||
return GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES.get(retailer, retailer)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue