More
This commit is contained in:
parent
128b3191e7
commit
03445188ea
54 changed files with 596953 additions and 3577 deletions
124
finder/transform.py
Normal file
124
finder/transform.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
|
||||
from spatial import PostcodeSpatialIndex
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
def parse_display_size(display_size: str | None) -> float | None:
|
||||
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
|
||||
if not display_size:
|
||||
return None
|
||||
# Try sq. ft. first
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
# Try sq. m.
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return None
|
||||
|
||||
|
||||
def map_property_type(sub_type: str | None) -> str:
|
||||
"""Map propertySubType to canonical type."""
|
||||
if not sub_type:
|
||||
return "Other"
|
||||
canonical = PROPERTY_TYPE_MAP.get(sub_type)
|
||||
if canonical:
|
||||
return canonical
|
||||
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
|
||||
return "Other"
|
||||
|
||||
|
||||
def extract_tenure(tenure_obj: dict | None) -> str | None:
|
||||
"""Extract tenure string from tenure object."""
|
||||
if not tenure_obj:
|
||||
return None
|
||||
tt = tenure_obj.get("tenureType", "")
|
||||
if tt == "FREEHOLD":
|
||||
return "Freehold"
|
||||
if tt == "LEASEHOLD":
|
||||
return "Leasehold"
|
||||
return None
|
||||
|
||||
|
||||
def fix_coords(lat: float, lng: float) -> tuple[float, float]:
|
||||
"""Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2."""
|
||||
if 49 <= lat <= 56 and -7 <= lng <= 2:
|
||||
return lat, lng
|
||||
if 49 <= lng <= 56 and -7 <= lat <= 2:
|
||||
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
|
||||
return lng, lat
|
||||
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
|
||||
return lat, lng
|
||||
|
||||
|
||||
def normalize_price(amount: int, frequency: str) -> int:
|
||||
"""Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
|
||||
if frequency == "weekly":
|
||||
return round(amount * 52 / 12)
|
||||
if frequency == "yearly":
|
||||
return round(amount / 12)
|
||||
return amount
|
||||
|
||||
|
||||
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
|
||||
"""Transform a raw Rightmove property dict into our output schema."""
|
||||
loc = prop.get("location")
|
||||
if not loc:
|
||||
return None
|
||||
raw_lat = loc.get("latitude")
|
||||
raw_lng = loc.get("longitude")
|
||||
if raw_lat is None or raw_lng is None:
|
||||
return None
|
||||
|
||||
lat, lng = fix_coords(raw_lat, raw_lng)
|
||||
|
||||
price_obj = prop.get("price", {})
|
||||
amount = price_obj.get("amount")
|
||||
if amount is None:
|
||||
return None
|
||||
frequency = price_obj.get("frequency", "")
|
||||
price = normalize_price(int(amount), frequency)
|
||||
|
||||
display_prices = price_obj.get("displayPrices", [])
|
||||
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
||||
|
||||
sub_type = prop.get("propertySubType", "")
|
||||
bedrooms = prop.get("bedrooms", 0) or 0
|
||||
bathrooms = prop.get("bathrooms", 0) or 0
|
||||
|
||||
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
|
||||
|
||||
listing_update = prop.get("listingUpdate", {})
|
||||
update_date = listing_update.get("listingUpdateDate", "")
|
||||
|
||||
postcode = pc_index.nearest(lat, lng)
|
||||
|
||||
return {
|
||||
"id": prop.get("id"),
|
||||
"bedrooms": bedrooms,
|
||||
"bathrooms": bathrooms,
|
||||
"total_rooms": bedrooms + bathrooms,
|
||||
"longitude": lng,
|
||||
"latitude": lat,
|
||||
"postcode": postcode,
|
||||
"address": prop.get("displayAddress", ""),
|
||||
"tenure": extract_tenure(prop.get("tenure")),
|
||||
"property_type": map_property_type(sub_type),
|
||||
"property_sub_type": sub_type or "Unknown",
|
||||
"price": price,
|
||||
"price_frequency": frequency,
|
||||
"price_qualifier": price_qualifier,
|
||||
"floorspace_sqm": parse_display_size(prop.get("displaySize")),
|
||||
"url": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
|
||||
"features": key_features,
|
||||
"first_visible_date": prop.get("firstVisibleDate", ""),
|
||||
"update_date": update_date,
|
||||
"outcode": outcode,
|
||||
"house_share": sub_type == "House Share",
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue