import logging import re from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE from spatial import PostcodeSpatialIndex log = logging.getLogger("rightmove") def parse_display_size(display_size: str | None) -> float | None: """Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm.""" if not display_size: return None # Try sq. ft. first m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE) if m: sqft = float(m.group(1).replace(",", "")) return round(sqft * 0.092903, 1) # Try sq. m. m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE) if m: return round(float(m.group(1).replace(",", "")), 1) return None def map_property_type(sub_type: str | None) -> str: """Map propertySubType to canonical type.""" if not sub_type: return "Other" canonical = PROPERTY_TYPE_MAP.get(sub_type) if canonical: return canonical log.warning("Unknown propertySubType: %r — mapping to Other", sub_type) return "Other" def extract_tenure(tenure_obj: dict | None) -> str | None: """Extract tenure string from tenure object.""" if not tenure_obj: return None tt = tenure_obj.get("tenureType", "") if tt == "FREEHOLD": return "Freehold" if tt == "LEASEHOLD": return "Leasehold" return None def fix_coords(lat: float, lng: float) -> tuple[float, float]: """Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2.""" if 49 <= lat <= 56 and -7 <= lng <= 2: return lat, lng if 49 <= lng <= 56 and -7 <= lat <= 2: log.debug( "Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat, ) return lng, lat log.warning( "Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng, ) return lat, lng def normalize_price(amount: int, frequency: str) -> int: """Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12).""" if frequency == "weekly": return round(amount * 52 / 12) if frequency == "yearly": return round(amount / 12) return amount def transform_property( prop: dict, outcode: str, pc_index: PostcodeSpatialIndex ) -> dict | None: """Transform a raw Rightmove property dict into our output schema.""" loc = prop.get("location") if not loc: return None raw_lat = loc.get("latitude") raw_lng = loc.get("longitude") if raw_lat is None or raw_lng is None: return None lat, lng = fix_coords(raw_lat, raw_lng) price_obj = prop.get("price", {}) amount = price_obj.get("amount") if amount is None: return None frequency = price_obj.get("frequency", "") price = normalize_price(int(amount), frequency) display_prices = price_obj.get("displayPrices", []) price_qualifier = ( display_prices[0].get("displayPriceQualifier", "") if display_prices else "" ) sub_type = prop.get("propertySubType", "") bedrooms = prop.get("bedrooms", 0) or 0 bathrooms = prop.get("bathrooms", 0) or 0 key_features = [ kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description") ] postcode = pc_index.nearest(lat, lng) if not postcode: log.debug("No England postcode for property at %.4f, %.4f — skipping", lat, lng) return None return { "id": prop.get("id"), "Bedrooms": bedrooms, "Bathrooms": bathrooms, "Number of bedrooms & living rooms": bedrooms + bathrooms, "lon": lng, "lat": lat, "Postcode": postcode, "Address per Property Register": prop.get("displayAddress", ""), "Leasehold/Freehold": extract_tenure(prop.get("tenure")), "Property type": map_property_type(sub_type), "Property sub-type": sub_type or "Unknown", "price": price, "price_frequency": frequency, "Price qualifier": price_qualifier, "Total floor area (sqm)": parse_display_size(prop.get("displaySize")), "Listing URL": RIGHTMOVE_BASE + prop.get("propertyUrl", ""), "Listing features": key_features, "first_visible_date": prop.get("firstVisibleDate", ""), }