perfect-postcode/finder/openrent.py

"""OpenRent (openrent.co.uk) scraper — rental properties only.

OpenRent is behind AWS WAF, so we use Playwright (headless Chromium) to solve
the challenge and get valid cookies. Then we use curl_cffi with Chrome TLS
impersonation to make requests with those cookies.

OpenRent is a rental-only platform, so this scraper only handles RENT channel.

HTML structure (as of 2026-03):
  Search results page renders property cards as <a class="pli search-property-card">.
  Each card contains:
    - Monthly price in <div class="pim"> with <span class="text-primary">£X,XXX</span>
    - Weekly price in <div class="piw"> (hidden by Alpine.js)
    - Title in <div class="fw-medium text-primary fs-3">N Bed Type, Location, OUTCODE</div>
    - Features in <ul> with <li> items like "1 Bed", "1 Bath", "Furnished"
    - Listing ID in data-listing-id on the .or-swiper div
    - Description snippet in <div class="line-clamp-2">

  Detail page has:
    - <h1> with property title including outcode
    - <div id="map" data-lat="..." data-lng="..."> for coordinates
    - Tables with deposit, rent, furnishing, tenant preferences
"""

import logging
import os
import re
import time

from bs4 import BeautifulSoup
from curl_cffi.requests import Session
from curl_cffi.requests.errors import RequestsError
from playwright.sync_api import sync_playwright

from constants import (
    DELAY_BETWEEN_PAGES,
    OPENRENT_BASE,
    PROPERTY_TYPE_MAP,
    RETRY_BASE_DELAY,
)
from metrics import (
    flaresolverr_attempts_total,
    openrent_errors_total,
    openrent_properties_scraped,
    openrent_requests_total,
)
from spatial import PostcodeSpatialIndex

log = logging.getLogger("openrent")


class WafChallengeError(Exception):
    """Raised when OpenRent returns a WAF challenge, indicating cookies need refresh."""


# ---------------------------------------------------------------------------
# Cookie / session management via Playwright
# ---------------------------------------------------------------------------


def solve_waf() -> tuple[dict[str, str], str] | None:
    """Use Playwright (headless Chromium) to solve the AWS WAF challenge.
    Returns (cookies_dict, user_agent) or None on failure."""
    log.info("Solving AWS WAF challenge via Playwright")
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--no-sandbox", "--disable-blink-features=AutomationControlled"],
            )
            context = browser.new_context()
            page = context.new_page()

            url = f"{OPENRENT_BASE}/properties-to-rent/?term=london&isLive=true"
            log.info("Navigating to %s", url)
            page.goto(url, wait_until="domcontentloaded", timeout=60000)

            content = page.content()
            if "AwsWafIntegration" in content:
                log.info("Got WAF challenge page, waiting for resolution...")
                page.wait_for_selector(
                    "a.pli, .pli, .search-property-card",
                    timeout=30000,
                )

            raw_cookies = context.cookies()
            user_agent = page.evaluate("navigator.userAgent")
            browser.close()

        cookies = {c["name"]: c["value"] for c in raw_cookies}
        if "aws-waf-token" not in cookies:
            log.error("Playwright solved page but no aws-waf-token cookie found")
            flaresolverr_attempts_total.labels(result="no_cookies").inc()
            return None

        log.info(
            "AWS WAF solved — got %d cookies, UA: %s",
            len(cookies),
            user_agent[:60],
        )
        flaresolverr_attempts_total.labels(result="success").inc()
        return cookies, user_agent

    except Exception as e:
        log.error("Playwright WAF solve failed: %s", e)
        flaresolverr_attempts_total.labels(result="error").inc()
        return None


def load_cookies() -> tuple[dict[str, str], str] | None:
    """Get OpenRent cookies + user-agent.
    Tries Playwright first, then falls back to environment variables."""
    result = solve_waf()
    if result:
        return result

    # Fall back to env vars
    waf_token = os.environ.get("OPENRENT_WAF_TOKEN", "")
    if not waf_token:
        return None

    user_agent = os.environ.get(
        "OPENRENT_USER_AGENT",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/145.0.0.0 Safari/537.36",
    )
    return {"aws-waf-token": waf_token}, user_agent


def make_client(cookies: dict[str, str], user_agent: str) -> Session:
    """Create a curl_cffi Session configured for OpenRent.
    Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
    session = Session(impersonate="chrome")
    session.headers.update(
        {
            "User-Agent": user_agent,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-GB,en;q=0.9",
        }
    )
    for name, value in cookies.items():
        session.cookies.set(name, value, domain="openrent.co.uk")
    return session


# ---------------------------------------------------------------------------
# HTTP fetch with retry
# ---------------------------------------------------------------------------


def _status_label(code: int) -> str:
    if code >= 500:
        return "5xx"
    return str(code)


def fetch_page(
    client: Session,
    url: str,
    max_retries: int = 3,
) -> str | None:
    """GET HTML with retries on 429/5xx. Returns None on permanent failure.
    WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
    for attempt in range(max_retries):
        try:
            resp = client.get(url, timeout=30)
            openrent_requests_total.labels(status=_status_label(resp.status_code)).inc()

            if resp.status_code == 200:
                html = resp.text
                # Detect WAF challenge page masquerading as 200
                if "AwsWafIntegration" in html and "challenge.js" in html:
                    raise WafChallengeError(
                        "Got AWS WAF challenge page — cookies expired"
                    )
                return html

            if resp.status_code in (202, 403):
                raise WafChallengeError(
                    f"HTTP {resp.status_code} — cookies likely expired"
                )

            if resp.status_code in (429, 500, 502, 503, 504):
                delay = RETRY_BASE_DELAY * (2**attempt)
                log.warning(
                    "HTTP %d from %s, retry %d/%d in %.1fs",
                    resp.status_code,
                    url,
                    attempt + 1,
                    max_retries,
                    delay,
                )
                time.sleep(delay)
                continue

            log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
            return None

        except WafChallengeError:
            raise
        except RequestsError as e:
            openrent_errors_total.labels(type=type(e).__name__).inc()
            delay = RETRY_BASE_DELAY * (2**attempt)
            log.warning(
                "%s from %s, retry %d/%d in %.1fs",
                type(e).__name__,
                url,
                attempt + 1,
                max_retries,
                delay,
            )
            time.sleep(delay)

    openrent_errors_total.labels(type="retry_exhausted").inc()
    log.error("All %d retries exhausted for %s", max_retries, url)
    return None


# ---------------------------------------------------------------------------
# HTML parsing
# ---------------------------------------------------------------------------


def _extract_price_from_element(el) -> tuple[int, str] | None:
    """Extract price integer from a price element's text like '£2,100'."""
    if not el:
        return None
    text = el.get_text(strip=True)
    match = re.search(r"£([\d,]+)", text)
    if not match:
        return None
    return int(match.group(1).replace(",", ""))


def _extract_price(text: str) -> tuple[int, str] | None:
    """Extract price and frequency from text like '£1,500 pcm' or '£350 pw'.
    Returns (price_int, frequency) or None.

    OpenRent card text shows both monthly and weekly prices (e.g.
    '£2,800 per month £646 per week'), so check monthly *before* weekly
    to match the first (monthly) price that the regex captures."""
    match = re.search(r"£([\d,]+)", text)
    if not match:
        return None
    price = int(match.group(1).replace(",", ""))
    lower = text.lower()
    if "pcm" in lower or "per month" in lower or "/m" in lower:
        return price, "monthly"
    if "pw" in lower or "per week" in lower or "/w" in lower:
        return price, "weekly"
    if "pa" in lower or "per annum" in lower or "/y" in lower:
        return price, "yearly"
    # OpenRent defaults to pcm (per calendar month)
    return price, "monthly"


def _extract_bedrooms_from_title(title: str) -> int | None:
    """Extract bedroom count from title like '2 Bed Flat, Pimlico'."""
    match = re.search(r"(\d+)\s*bed", title, re.IGNORECASE)
    if match:
        return int(match.group(1))
    if re.search(r"\bstudio\b", title, re.IGNORECASE):
        return 0
    return None


def _extract_beds_baths_from_features(
    feature_items: list,
) -> tuple[int | None, int | None]:
    """Extract bedrooms and bathrooms from feature list items.

    OpenRent search cards have <ul> with items like:
      <li>1 Bed</li>  <li>1 Bath</li>  <li>Furnished</li>
    """
    bedrooms = None
    bathrooms = None
    for li in feature_items:
        text = li.get_text(strip=True).lower()
        bed_match = re.search(r"(\d+)\s*bed", text)
        if bed_match:
            bedrooms = int(bed_match.group(1))
        bath_match = re.search(r"(\d+)\s*bath", text)
        if bath_match:
            bathrooms = int(bath_match.group(1))
    return bedrooms, bathrooms


def _extract_postcode(text: str) -> str | None:
    """Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'."""
    match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
    if match:
        return match.group(1).upper().strip()
    return None


def _extract_outcode(text: str) -> str | None:
    """Extract UK outcode from text like '1 Bed Flat, Bank Chambers, SW1Y'.

    Looks for an outcode pattern (e.g., SW1Y, E1, EC2A) at the end of the text
    or after the last comma."""
    # Try after last comma first (most reliable position in OpenRent titles)
    parts = text.split(",")
    if len(parts) > 1:
        last_part = parts[-1].strip()
        match = re.match(r"^([A-Z]{1,2}\d[A-Z0-9]?)$", last_part, re.IGNORECASE)
        if match:
            return match.group(1).upper()

    # Fall back to searching anywhere in text
    match = re.search(r"\b([A-Z]{1,2}\d[A-Z0-9]?)\b", text, re.IGNORECASE)
    if match:
        candidate = match.group(1).upper()
        # Avoid matching things like "1 Bed" → "1B"
        if len(candidate) >= 2 and not candidate[0].isdigit():
            return candidate
    return None


def _infer_property_type(title: str) -> str:
    """Infer property type from title text.

    Order matters: "Room in a Shared Flat" should be "Room" not "Flat",
    so check "room" before "flat"."""
    lower = title.lower()
    if "room in" in lower or "room " in lower:
        return "Room"
    if "studio" in lower:
        return "Studio"
    if "flat" in lower or "apartment" in lower:
        return "Flat"
    if "maisonette" in lower:
        return "Maisonette"
    if "house" in lower:
        return "House"
    if "bungalow" in lower:
        return "Bungalow"
    return ""


def parse_search_results(html: str) -> list[dict]:
    """Parse property data from OpenRent search results HTML.

    Returns list of raw property dicts extracted from property cards.

    Current OpenRent card structure (2026-03):
      <a class="pli search-property-card" href="/property-to-rent/.../ID">
        <div class="or-swiper" data-listing-id="ID">
        <div class="pim"><span class="text-primary">£2,100</span> per month</div>
        <div class="piw"><span class="text-primary">£485</span> per week</div>
        <div class="fw-medium text-primary fs-3">1 Bed Flat, Location, SW1Y</div>
        <ul>...<li>1 Bed</li><li>1 Bath</li><li>Furnished</li>...</ul>
    """
    soup = BeautifulSoup(html, "html.parser")
    properties = []

    # Property cards: <a class="pli search-property-card">
    cards = soup.select("a.pli")
    if not cards:
        cards = soup.find_all("a", href=re.compile(r"/property-to-rent/"))

    if not cards:
        log.warning(
            "No property cards found in search HTML (%d bytes). "
            "CSS selectors may need updating.",
            len(html),
        )
        return []

    for card in cards:
        prop: dict = {}

        # Extract property URL and ID from href
        href = card.get("href", "")
        if not href:
            continue

        prop["url"] = href if href.startswith("http") else OPENRENT_BASE + href
        id_match = re.search(r"/(\d+)(?:\?|$|#)", href)
        if id_match:
            prop["id"] = id_match.group(1)
        else:
            # Try data-listing-id on the swiper element
            swiper = card.select_one("[data-listing-id]")
            if swiper:
                prop["id"] = swiper["data-listing-id"]
            else:
                continue  # can't use a property without an ID

        # --- Price ---
        # Prefer structured price elements over free-text parsing.
        # Monthly price is in <div class="pim"><span class="text-primary">£X</span>
        pim = card.select_one(".pim .text-primary, .pim span")
        piw = card.select_one(".piw .text-primary, .piw span")

        monthly_price = _extract_price_from_element(pim)
        weekly_price = _extract_price_from_element(piw)

        if monthly_price:
            prop["price"] = monthly_price
            prop["frequency"] = "monthly"
        elif weekly_price:
            prop["price"] = weekly_price
            prop["frequency"] = "weekly"
        else:
            # Fall back to parsing card text
            card_text = card.get_text(" ", strip=True)
            price_result = _extract_price(card_text)
            if price_result:
                prop["price"], prop["frequency"] = price_result

        # --- Title / Address ---
        # The property title is in a div with classes "fw-medium text-primary fs-3"
        # e.g., "1 Bed Flat, Bank Chambers, SW1Y"
        title_el = card.select_one("div.fw-medium.fs-3")
        if not title_el:
            # Fallback: try image alt text which also has the title
            img = card.select_one("img.propertyPic")
            if img and img.get("alt"):
                prop["title"] = img["alt"]
            else:
                # Last resort: extract from card text, excluding price/nav noise
                prop["title"] = ""
        else:
            prop["title"] = title_el.get_text(strip=True)

        # --- Bedrooms / Bathrooms from feature list ---
        feature_list = card.select("ul li")
        beds_from_features, baths_from_features = _extract_beds_baths_from_features(
            feature_list,
        )

        # Bedrooms: prefer feature list, fall back to title parsing
        if beds_from_features is not None:
            prop["bedrooms"] = beds_from_features
        else:
            beds = _extract_bedrooms_from_title(prop.get("title", ""))
            if beds is not None:
                prop["bedrooms"] = beds

        if baths_from_features is not None:
            prop["bathrooms"] = baths_from_features

        # --- Property type from title ---
        title = prop.get("title", "")
        prop["property_type"] = _infer_property_type(title)

        # --- Postcode / outcode from title ---
        postcode = _extract_postcode(title)
        if postcode:
            prop["postcode"] = postcode
        else:
            outcode = _extract_outcode(title)
            if outcode:
                prop["outcode"] = outcode

        # --- Description snippet ---
        desc_el = card.select_one(".line-clamp-2")
        if desc_el:
            prop["description"] = desc_el.get_text(strip=True)

        # --- Coordinates from data attributes (may not be present on cards) ---
        for el in [card] + card.select("[data-lat], [data-latitude]"):
            lat = el.get("data-lat") or el.get("data-latitude")
            lng = el.get("data-lng") or el.get("data-longitude") or el.get("data-lon")
            if lat and lng:
                try:
                    prop["lat"] = float(lat)
                    prop["lng"] = float(lng)
                except ValueError:
                    pass
                break

        properties.append(prop)

    log.debug("Parsed %d property cards from search HTML", len(properties))
    return properties


def parse_property_detail(html: str) -> dict:
    """Parse a single property detail page for additional data.

    Current detail page structure (2026-03):
      - <h1> has the full title (e.g., "Room in a Shared House, Lime Tree Court, AL2")
      - <div id="map" data-lat="..." data-lng="..."> has coordinates
      - Tables have "Rent PCM", "Deposit", "Bills Included", etc. (NOT bedrooms)
      - Description in elements with class containing "description"
    """
    soup = BeautifulSoup(html, "html.parser")
    details: dict = {}

    # --- Title from h1 ---
    h1 = soup.select_one("h1")
    if h1:
        title_text = h1.get_text(strip=True)
        # Validate it's not a nav/modal element (e.g. "Log in")
        if len(title_text) > 10 and "log in" not in title_text.lower():
            details["title"] = title_text
            postcode = _extract_postcode(title_text)
            if postcode:
                details["postcode"] = postcode

    # --- Coordinates from map element ---
    # The map div has id="map" with data-lat and data-lng
    map_el = soup.select_one("#map[data-lat]")
    if not map_el:
        # Fallback: any element with data-lat (but prefer #map)
        map_el = soup.select_one("[data-lat]")
    if map_el:
        lat = map_el.get("data-lat")
        lng = map_el.get("data-lng") or map_el.get("data-lon")
        if lat and lng:
            try:
                details["lat"] = float(lat)
                details["lng"] = float(lng)
            except ValueError:
                pass

    # --- Parse tables for rent and property details ---
    for table in soup.select("table"):
        for row in table.select("tr"):
            cells = row.select("td")
            if len(cells) < 2:
                continue
            label = cells[0].get_text(strip=True).lower()
            value = cells[1].get_text(strip=True)

            if "rent" in label and "pcm" in label:
                match = re.search(r"£([\d,]+)", value)
                if match:
                    details["price"] = int(match.group(1).replace(",", ""))
            elif "bedroom" in label:
                match = re.search(r"(\d+)", value)
                if match:
                    details["bedrooms"] = int(match.group(1))
            elif "bathroom" in label:
                match = re.search(r"(\d+)", value)
                if match:
                    details["bathrooms"] = int(match.group(1))
            elif "type" in label and "property" in label:
                details["property_type"] = value
            elif "available" in label or "move" in label:
                details["available_date"] = value
            elif "furnish" in label:
                details["furnished"] = value

    # --- Coordinates from inline JavaScript (last resort) ---
    if "lat" not in details:
        for script in soup.select("script"):
            text = script.string or ""
            lat_match = re.search(r'"latitude"\s*:\s*([\d.-]+)', text)
            lng_match = re.search(r'"longitude"\s*:\s*([\d.-]+)', text)
            if lat_match and lng_match:
                try:
                    details["lat"] = float(lat_match.group(1))
                    details["lng"] = float(lng_match.group(1))
                except ValueError:
                    pass
                break

    # --- Description for floor area ---
    desc_el = soup.select_one(".description, [class*='description'], #description")
    if desc_el:
        details["description"] = desc_el.get_text(strip=True)

    return details


# ---------------------------------------------------------------------------
# Property type mapping & floor area
# ---------------------------------------------------------------------------


def map_property_type(raw_type: str | None) -> str:
    """Map OpenRent property type to canonical type."""
    if not raw_type:
        return "Other"
    canonical = PROPERTY_TYPE_MAP.get(raw_type)
    if canonical:
        return canonical
    lower = raw_type.lower()
    if "room" in lower or "shared" in lower:
        return "Other"
    if (
        "flat" in lower
        or "apartment" in lower
        or "maisonette" in lower
        or "studio" in lower
    ):
        return "Flats/Maisonettes"
    if "detached" in lower and "semi" not in lower:
        return "Detached"
    if "semi" in lower:
        return "Semi-Detached"
    if "terrace" in lower or "mews" in lower:
        return "Terraced"
    if "house" in lower:
        return "Detached"
    log.debug("Unknown property type: %r — mapping to Other", raw_type)
    return "Other"


def parse_floor_area(description: str | None) -> float | None:
    """Try to extract floor area from description text."""
    if not description:
        return None
    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
    if m:
        sqft = float(m.group(1).replace(",", ""))
        return round(sqft * 0.092903, 1)
    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
    if m:
        return round(float(m.group(1).replace(",", "")), 1)
    return None


# ---------------------------------------------------------------------------
# Transform & search
# ---------------------------------------------------------------------------


def _resolve_outcode_postcodes(
    outcode: str,
    pc_coords: dict[str, tuple[float, float]],
) -> list[str]:
    """Get all postcodes for an outcode from the postcode coordinates lookup."""
    prefix = outcode + " "
    # Also try without space for non-standard format (e.g. "SW1Y" matches "SW1Y 4AA")
    return [pcd for pcd in pc_coords if pcd.startswith(prefix)]


def transform_property(
    search_data: dict,
    detail_data: dict | None,
    pc_index: PostcodeSpatialIndex,
    pc_coords: dict[str, tuple[float, float]],
) -> dict | None:
    """Transform OpenRent property data into our output schema.

    Merges data from the search results page and (optionally) the detail page.
    Uses pc_coords (postcode -> lat/lng) as a fallback when coordinates are
    missing but a postcode is available.
    """
    detail = detail_data or {}

    # Merge: detail page data takes precedence
    lat = detail.get("lat") or search_data.get("lat")
    lng = detail.get("lng") or search_data.get("lng")
    price = detail.get("price") or search_data.get("price")
    if not price:
        return None

    frequency = search_data.get("frequency", "monthly")

    # Get postcode: detail page > search card
    postcode = detail.get("postcode") or search_data.get("postcode")

    if lat is not None and lng is not None:
        # Validate coordinates are in England
        if not (49 <= lat <= 56 and -7 <= lng <= 2):
            log.debug("Coords outside England: lat=%.4f lng=%.4f — skipping", lat, lng)
            return None
        if not postcode:
            if pc_index:
                postcode = pc_index.nearest(lat, lng)
            elif search_data.get("outcode"):
                # No spatial index — try outcode lookup as fallback
                outcode_pcs = _resolve_outcode_postcodes(
                    search_data["outcode"],
                    pc_coords,
                )
                if outcode_pcs:
                    postcode = outcode_pcs[0]
    elif postcode:
        # Have postcode but no coordinates — look up centroid from arcgis data
        coords = pc_coords.get(postcode)
        if coords:
            lat, lng = coords
        else:
            log.debug("Postcode %s not in arcgis data — skipping", postcode)
            return None
    elif search_data.get("outcode"):
        # Have only outcode — find postcodes in that outcode and use centroid
        outcode = search_data["outcode"]
        outcode_postcodes = _resolve_outcode_postcodes(outcode, pc_coords)
        if outcode_postcodes:
            # Use the first postcode as a rough approximation
            postcode = outcode_postcodes[0]
            lat, lng = pc_coords[postcode]
        else:
            log.debug("No postcodes found for outcode %s — skipping", outcode)
            return None
    else:
        return None

    if not postcode:
        log.debug("No postcode for property — skipping")
        return None

    bedrooms = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
    bathrooms = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0

    # Title: prefer detail page (has h1 with full title)
    title = detail.get("title") or search_data.get("title", "")

    # Address: take the middle part of the title (skip the "N Bed Type" prefix
    # and the outcode suffix). E.g., "1 Bed Flat, Bank Chambers, SW1Y" -> "Bank Chambers"
    address = ""
    if title:
        parts = [p.strip() for p in title.split(",")]
        if len(parts) >= 3:
            # Skip first (type) and last (outcode), join the middle
            address = ", ".join(parts[1:-1])
        elif len(parts) == 2:
            # Could be "Location, OUTCODE" or "Type, Location"
            # If last part looks like an outcode, use the first part
            if re.match(r"^[A-Z]{1,2}\d", parts[-1].strip()):
                address = parts[0]
            else:
                address = parts[1]
        else:
            address = title

    # Property type: prefer detail, then search card, then infer from title
    property_type = detail.get("property_type") or search_data.get("property_type", "")
    if not property_type and title:
        property_type = _infer_property_type(title)

    prop_id = search_data.get("id", "")
    listing_url = search_data.get(
        "url",
        f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
    )
    description = detail.get("description") or search_data.get("description", "")

    return {
        "id": f"or_{prop_id}",
        "Bedrooms": bedrooms,
        "Bathrooms": bathrooms,
        "Number of bedrooms & living rooms": bedrooms,
        "lon": lng,
        "lat": lat,
        "Postcode": postcode,
        "Address per Property Register": address,
        "Leasehold/Freehold": None,
        "Property type": map_property_type(property_type),
        "Property sub-type": property_type or "Unknown",
        "price": int(price),
        "price_frequency": frequency,
        "Price qualifier": "",
        "Total floor area (sqm)": parse_floor_area(description),
        "Listing URL": listing_url,
        "Listing features": [],
        "first_visible_date": detail.get("available_date", ""),
    }


def search_outcode(
    client: Session,
    outcode: str,
    pc_index: PostcodeSpatialIndex,
    pc_coords: dict[str, tuple[float, float]],
    fetch_details: bool = True,
) -> list[dict]:
    """Search OpenRent for rental properties in one outcode.

    1. Fetches the search results page for the outcode
    2. Parses property cards from the HTML (title, price, beds, baths)
    3. Fetches each property's detail page for coordinates
    4. Transforms to common output schema

    The search card provides most data (price, bedrooms, bathrooms, title,
    property type). Detail pages are needed primarily for precise coordinates
    and full postcodes. When detail pages fail, we fall back to outcode-level
    coordinates from the postcode lookup.
    """
    search_url = f"{OPENRENT_BASE}/properties-to-rent/?term={outcode}&isLive=true"

    html = fetch_page(client, search_url)
    if not html:
        return []

    search_results = parse_search_results(html)
    if not search_results:
        return []

    properties = []
    for search_data in search_results:
        detail_data = None

        if fetch_details and search_data.get("url"):
            detail_html = fetch_page(client, search_data["url"])
            if detail_html:
                detail_data = parse_property_detail(detail_html)
            # Shorter delay for detail pages (within same outcode)
            time.sleep(DELAY_BETWEEN_PAGES * 0.5)

        transformed = transform_property(
            search_data,
            detail_data,
            pc_index,
            pc_coords,
        )
        if transformed:
            properties.append(transformed)
            openrent_properties_scraped.labels(channel="rent").inc()

    return properties