vibes
This commit is contained in:
parent
39ef5c6646
commit
c995f12f8b
78 changed files with 4830 additions and 1619 deletions
|
|
@ -37,7 +37,13 @@ from constants import (
|
|||
ZOOPLA_BASE,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import normalize_sub_type, parse_int_value, validate_floor_area
|
||||
from transform import (
|
||||
clean_listing_address,
|
||||
extract_full_postcode,
|
||||
normalize_sub_type,
|
||||
parse_int_value,
|
||||
validate_floor_area,
|
||||
)
|
||||
|
||||
log = logging.getLogger("zoopla")
|
||||
|
||||
|
|
@ -1031,19 +1037,6 @@ def _resolve_outcode_coords(
|
|||
return None
|
||||
|
||||
|
||||
def _extract_postcode(text: str) -> str | None:
|
||||
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.
|
||||
Normalizes to include a space before the 3-char incode."""
|
||||
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
|
||||
if match:
|
||||
raw = match.group(1).upper().strip()
|
||||
# Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA"
|
||||
if " " not in raw and len(raw) >= 5:
|
||||
return raw[:-3] + " " + raw[-3:]
|
||||
return raw
|
||||
return None
|
||||
|
||||
|
||||
def _extract_outcode(text: str) -> str | None:
|
||||
"""Extract a UK outcode from address text like 'Whitechapel Road, London E1'."""
|
||||
# Look for outcode at end of string or after last comma
|
||||
|
|
@ -1123,10 +1116,12 @@ def transform_property(
|
|||
from postcodes extracted from the address text."""
|
||||
price = parse_int_value(raw.get("price")) or 0
|
||||
|
||||
address = raw.get("address", "")
|
||||
address = raw.get("address", "") or ""
|
||||
|
||||
# Resolve postcode and coordinates from address
|
||||
postcode = _extract_postcode(address)
|
||||
extracted_postcode = extract_full_postcode(address)
|
||||
postcode = extracted_postcode
|
||||
postcode_source = "address" if extracted_postcode else None
|
||||
lat = lng = None
|
||||
|
||||
if postcode:
|
||||
|
|
@ -1141,12 +1136,14 @@ def transform_property(
|
|||
result = _resolve_outcode_coords(addr_outcode, pc_coords)
|
||||
if result:
|
||||
postcode, lat, lng = result
|
||||
postcode_source = "address_outcode"
|
||||
|
||||
# Final fallback: use the outcode we know we're searching
|
||||
if lat is None and search_outcode:
|
||||
result = _resolve_outcode_coords(search_outcode, pc_coords)
|
||||
if result:
|
||||
postcode, lat, lng = result
|
||||
postcode_source = "search_outcode"
|
||||
|
||||
if lat is None or lng is None or not postcode:
|
||||
return None
|
||||
|
|
@ -1189,7 +1186,11 @@ def transform_property(
|
|||
"lon": lng,
|
||||
"lat": lat,
|
||||
"Postcode": postcode,
|
||||
"Address per Property Register": address,
|
||||
"Postcode source": postcode_source or "unknown",
|
||||
"Extracted postcode": extracted_postcode,
|
||||
"Inferred postcode": postcode if postcode_source != "address" else None,
|
||||
"Listing raw address": address,
|
||||
"Address per Property Register": clean_listing_address(address),
|
||||
"Leasehold/Freehold": raw.get("tenure") or None,
|
||||
"Property type": _map_property_type(raw.get("property_type")),
|
||||
"Property sub-type": normalize_sub_type(raw.get("property_type")),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue