More
This commit is contained in:
parent
128b3191e7
commit
03445188ea
54 changed files with 596953 additions and 3577 deletions
128
.github/workflows/ci.yml
vendored
128
.github/workflows/ci.yml
vendored
|
|
@ -1,128 +0,0 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
lint-python:
|
||||
name: Lint Python
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --dev
|
||||
|
||||
- name: Run ruff check
|
||||
run: uv run ruff check .
|
||||
|
||||
- name: Run ruff format check
|
||||
run: uv run ruff format --check .
|
||||
|
||||
lint-frontend:
|
||||
name: Lint Frontend
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: frontend
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "npm"
|
||||
cache-dependency-path: frontend/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run ESLint
|
||||
run: npm run lint
|
||||
|
||||
- name: Run Prettier check
|
||||
run: npm run format:check
|
||||
|
||||
- name: Run TypeScript check
|
||||
run: npm run typecheck
|
||||
|
||||
build-frontend:
|
||||
name: Build Frontend
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint-frontend]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: frontend
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "npm"
|
||||
cache-dependency-path: frontend/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
lint-rust:
|
||||
name: Lint Rust
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: server-rs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
components: clippy, rustfmt
|
||||
|
||||
- name: Cache cargo
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: server-rs
|
||||
|
||||
- name: Run clippy
|
||||
run: cargo clippy -- -D warnings
|
||||
|
||||
- name: Check formatting
|
||||
run: cargo fmt --check
|
||||
|
||||
test-rust:
|
||||
name: Test Rust
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint-rust]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: server-rs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Cache cargo
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: server-rs
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test
|
||||
49
.github/workflows/docker.yml
vendored
49
.github/workflows/docker.yml
vendored
|
|
@ -1,49 +0,0 @@
|
|||
name: Docker
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=latest
|
||||
type=sha,prefix=sha-,format=short
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
28
.github/workflows/lint.yml
vendored
28
.github/workflows/lint.yml
vendored
|
|
@ -1,28 +0,0 @@
|
|||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync
|
||||
|
||||
- name: Check linting
|
||||
run: uv run ruff check .
|
||||
|
||||
- name: Check formatting
|
||||
run: uv run ruff format --check .
|
||||
594829
analyses/rightmove_buy.ipynb
Normal file
594829
analyses/rightmove_buy.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -31,7 +31,7 @@ services:
|
|||
OLLAMA_URL: http://host.docker.internal:11434
|
||||
OLLAMA_MODEL: gpt-oss:20b
|
||||
PUBLIC_URL: https://perfectpostcodes.schmelczer.dev
|
||||
R5_URL: http://r5:8003
|
||||
|
||||
GOOGLE_MAPS_API_KEY: "AIzaSyBgBn9LjrxHCjb9j1LZbLYpEdCJj-NkHPY"
|
||||
depends_on:
|
||||
pocketbase:
|
||||
|
|
@ -141,27 +141,6 @@ services:
|
|||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
r5:
|
||||
init: true
|
||||
build: ./r5-java
|
||||
ports:
|
||||
- "8004:8003"
|
||||
networks:
|
||||
- dev-network
|
||||
volumes:
|
||||
- r5-network:/data/network
|
||||
- ./property-data/transit:/data/transit:ro
|
||||
- ./property-data/transit/raw:/data/transit-raw:ro
|
||||
environment:
|
||||
DATA_DIR: /data/transit
|
||||
OSM_DIR: /data/transit-raw
|
||||
NETWORK_CACHE_DIR: /data/network
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8003/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 300s
|
||||
|
||||
volumes:
|
||||
pb-data:
|
||||
|
|
@ -169,7 +148,6 @@ volumes:
|
|||
cargo-target:
|
||||
frontend-node-modules:
|
||||
screenshot-cache:
|
||||
r5-network:
|
||||
gluetun-cache-v2:
|
||||
gluetun-auth:
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,6 @@ WORKDIR /app
|
|||
COPY pyproject.toml ./
|
||||
RUN uv pip install --system -r pyproject.toml
|
||||
|
||||
COPY main.py ./
|
||||
COPY *.py ./
|
||||
|
||||
CMD ["python3", "main.py"]
|
||||
|
|
|
|||
56
finder/constants.py
Normal file
56
finder/constants.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
|
||||
DATA_DIR = Path("/app/data")
|
||||
PAGE_SIZE = 24
|
||||
DELAY_BETWEEN_PAGES = 1.0
|
||||
DELAY_BETWEEN_OUTCODES = 2.0
|
||||
MAX_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 2.0
|
||||
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
|
||||
SEED = 42
|
||||
|
||||
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
||||
RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
|
||||
|
||||
PROPERTY_TYPE_MAP = {
|
||||
"Detached": "Detached",
|
||||
"Semi-Detached": "Semi-Detached",
|
||||
"Terraced": "Terraced",
|
||||
"End of Terrace": "Terraced",
|
||||
"Mid Terrace": "Terraced",
|
||||
"Flat": "Flat",
|
||||
"Maisonette": "Flat",
|
||||
"Studio": "Flat",
|
||||
"Apartment": "Flat",
|
||||
"Penthouse": "Flat",
|
||||
"Ground Flat": "Flat",
|
||||
"Detached Bungalow": "Detached",
|
||||
"Semi-Detached Bungalow": "Semi-Detached",
|
||||
"Town House": "Terraced",
|
||||
"Link Detached": "Detached",
|
||||
"Link Detached House": "Detached",
|
||||
"Bungalow": "Other",
|
||||
"Cottage": "Other",
|
||||
"Park Home": "Other",
|
||||
"Land": "Other",
|
||||
"Farm / Barn": "Other",
|
||||
"House": "Detached",
|
||||
"Not Specified": "Other",
|
||||
"Chalet": "Other",
|
||||
"Barn Conversion": "Other",
|
||||
"Coach House": "Other",
|
||||
"Character Property": "Other",
|
||||
"Cluster House": "Other",
|
||||
"Retirement Property": "Flat",
|
||||
"Plot": "Other",
|
||||
"Garages": "Other",
|
||||
"Mews": "Terraced",
|
||||
}
|
||||
|
||||
CHANNELS = [
|
||||
{"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
|
||||
{"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
|
||||
]
|
||||
126
finder/http_client.py
Normal file
126
finder/http_client.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
import httpx
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
from constants import MAX_RETRIES, RETRY_BASE_DELAY
|
||||
from metrics import http_errors_total, http_requests_total, ip_rotations_total
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0)
|
||||
|
||||
|
||||
def _endpoint_label(url: str) -> str:
|
||||
if "typeahead" in url:
|
||||
return "typeahead"
|
||||
if "search" in url:
|
||||
return "search"
|
||||
return "other"
|
||||
|
||||
|
||||
def _status_label(code: int) -> str:
|
||||
if code >= 500:
|
||||
return "5xx"
|
||||
return str(code)
|
||||
|
||||
# Gluetun control API — runs on port 8000 inside the gluetun container.
|
||||
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
|
||||
GLUETUN_API = "http://127.0.0.1:8000"
|
||||
_ip_rotate_lock = threading.Lock()
|
||||
|
||||
|
||||
def rotate_ip() -> bool:
|
||||
"""Ask gluetun to reconnect to a different VPN server, getting a new IP.
|
||||
Returns True if the IP changed successfully."""
|
||||
with _ip_rotate_lock:
|
||||
log.info("Rotating VPN IP via gluetun...")
|
||||
try:
|
||||
# Get current IP
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
|
||||
log.info("Current IP: %s", old_ip)
|
||||
|
||||
# Trigger server change — PUT with empty JSON body picks a random server
|
||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
time.sleep(2)
|
||||
|
||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
|
||||
# Wait for reconnection
|
||||
for _ in range(30):
|
||||
time.sleep(2)
|
||||
try:
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
if new_ip_resp.status_code == 200:
|
||||
new_ip = new_ip_resp.json().get("public_ip", "")
|
||||
if new_ip and new_ip != old_ip:
|
||||
log.info("IP rotated: %s → %s", old_ip, new_ip)
|
||||
ip_rotations_total.labels(result="success").inc()
|
||||
return True
|
||||
except Exception:
|
||||
pass # VPN still reconnecting
|
||||
|
||||
log.warning("IP rotation timed out (may still be same IP)")
|
||||
ip_rotations_total.labels(result="failure").inc()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
log.error("IP rotation failed: %s", e)
|
||||
ip_rotations_total.labels(result="failure").inc()
|
||||
return False
|
||||
|
||||
|
||||
def make_client() -> httpx.Client:
|
||||
return httpx.Client(
|
||||
timeout=30,
|
||||
headers={"User-Agent": _ua.random, "Accept": "application/json"},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
|
||||
def fetch_with_retry(
|
||||
client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
|
||||
) -> dict | None:
|
||||
"""GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure.
|
||||
On 403, triggers IP rotation and retries once."""
|
||||
endpoint = _endpoint_label(url)
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
resp = client.get(url, params=params)
|
||||
http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc()
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
if resp.status_code == 403 and on_403:
|
||||
log.warning("HTTP 403 — IP likely blocked, rotating...")
|
||||
if rotate_ip():
|
||||
# Retry once with new IP (but don't recurse on 403 again)
|
||||
return fetch_with_retry(client, url, params, on_403=False)
|
||||
log.error("IP rotation failed, giving up on %s", url)
|
||||
return None
|
||||
if resp.status_code in (429, 500, 502, 503, 504):
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
||||
return None
|
||||
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
|
||||
http_errors_total.labels(type=type(e).__name__).inc()
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
|
||||
time.sleep(delay)
|
||||
http_errors_total.labels(type="retry_exhausted").inc()
|
||||
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
||||
return None
|
||||
643
finder/main.py
643
finder/main.py
|
|
@ -1,17 +1,21 @@
|
|||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import polars as pl
|
||||
from flask import Flask, jsonify, send_from_directory
|
||||
from flask import Flask, Response, jsonify, send_from_directory
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
from constants import DATA_DIR
|
||||
from rightmove import outcode_cache
|
||||
from scraper import (
|
||||
_sync_gauges,
|
||||
build_postcode_index,
|
||||
load_outcodes,
|
||||
run_scrape,
|
||||
status,
|
||||
status_lock,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging
|
||||
|
|
@ -33,615 +37,6 @@ log.setLevel(logging.DEBUG)
|
|||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
|
||||
DATA_DIR = Path("/app/data")
|
||||
PAGE_SIZE = 24
|
||||
MAX_PAGES_PER_OUTCODE = 42 # 24*42 = 1008, safety cap per outcode
|
||||
DELAY_BETWEEN_PAGES = 1.0
|
||||
DELAY_BETWEEN_OUTCODES = 2.0
|
||||
MAX_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 2.0
|
||||
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
|
||||
SEED = 42
|
||||
|
||||
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
||||
RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
|
||||
|
||||
PROPERTY_TYPE_MAP = {
|
||||
"Detached": "Detached",
|
||||
"Semi-Detached": "Semi-Detached",
|
||||
"Terraced": "Terraced",
|
||||
"End of Terrace": "Terraced",
|
||||
"Mid Terrace": "Terraced",
|
||||
"Flat": "Flat",
|
||||
"Maisonette": "Flat",
|
||||
"Studio": "Flat",
|
||||
"Apartment": "Flat",
|
||||
"Penthouse": "Flat",
|
||||
"Ground Flat": "Flat",
|
||||
"Detached Bungalow": "Detached",
|
||||
"Semi-Detached Bungalow": "Semi-Detached",
|
||||
"Town House": "Terraced",
|
||||
"Link Detached": "Detached",
|
||||
"Link Detached House": "Detached",
|
||||
"Bungalow": "Other",
|
||||
"Cottage": "Other",
|
||||
"Park Home": "Other",
|
||||
"Land": "Other",
|
||||
"Farm / Barn": "Other",
|
||||
"House": "Detached",
|
||||
"Not Specified": "Other",
|
||||
"Chalet": "Other",
|
||||
"Barn Conversion": "Other",
|
||||
"Coach House": "Other",
|
||||
"Character Property": "Other",
|
||||
"Cluster House": "Other",
|
||||
"Retirement Property": "Flat",
|
||||
"Plot": "Other",
|
||||
"Garages": "Other",
|
||||
"Mews": "Terraced",
|
||||
}
|
||||
|
||||
CHANNELS = [
|
||||
{"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
|
||||
{"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Postcode spatial index
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PostcodeSpatialIndex:
|
||||
"""Grid-based spatial index over arcgis postcodes for nearest-lookup."""
|
||||
|
||||
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
|
||||
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
|
||||
for lat, lng, pcd in zip(lats, lngs, postcodes):
|
||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
||||
self.grid[(gx, gy)].append((lat, lng, pcd))
|
||||
log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
|
||||
|
||||
def nearest(self, lat: float, lng: float) -> str | None:
|
||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
||||
best_dist = float("inf")
|
||||
best_pcd = None
|
||||
for dx in range(-1, 2):
|
||||
for dy in range(-1, 2):
|
||||
for plat, plng, pcd in self.grid.get((gx + dx, gy + dy), []):
|
||||
d = (plat - lat) ** 2 + (plng - lng) ** 2
|
||||
if d < best_dist:
|
||||
best_dist = d
|
||||
best_pcd = pcd
|
||||
return best_pcd
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scrape status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScrapeStatus:
|
||||
state: str = "idle" # idle | running | done | error
|
||||
channel: str = ""
|
||||
outcode: str = ""
|
||||
outcodes_done: int = 0
|
||||
outcodes_total: int = 0
|
||||
properties_buy: int = 0
|
||||
properties_rent: int = 0
|
||||
errors: list[str] = field(default_factory=list)
|
||||
started_at: float = 0.0
|
||||
finished_at: float = 0.0
|
||||
|
||||
|
||||
status = ScrapeStatus()
|
||||
status_lock = threading.Lock()
|
||||
debug_data: dict = {"last_response": None, "outcode_cache": {}}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
# Gluetun control API — runs on port 8000 inside the gluetun container.
|
||||
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
|
||||
GLUETUN_API = "http://127.0.0.1:8000"
|
||||
_ip_rotate_lock = threading.Lock()
|
||||
|
||||
|
||||
def rotate_ip() -> bool:
|
||||
"""Ask gluetun to reconnect to a different VPN server, getting a new IP.
|
||||
Returns True if the IP changed successfully."""
|
||||
with _ip_rotate_lock:
|
||||
log.info("Rotating VPN IP via gluetun...")
|
||||
try:
|
||||
# Get current IP
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
|
||||
log.info("Current IP: %s", old_ip)
|
||||
|
||||
# Trigger server change — PUT with empty JSON body picks a random server
|
||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
time.sleep(2)
|
||||
|
||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
|
||||
# Wait for reconnection
|
||||
for _ in range(30):
|
||||
time.sleep(2)
|
||||
try:
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
if new_ip_resp.status_code == 200:
|
||||
new_ip = new_ip_resp.json().get("public_ip", "")
|
||||
if new_ip and new_ip != old_ip:
|
||||
log.info("IP rotated: %s → %s", old_ip, new_ip)
|
||||
return True
|
||||
except Exception:
|
||||
pass # VPN still reconnecting
|
||||
|
||||
log.warning("IP rotation timed out (may still be same IP)")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
log.error("IP rotation failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def make_client() -> httpx.Client:
|
||||
return httpx.Client(
|
||||
timeout=30,
|
||||
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
|
||||
def fetch_with_retry(
|
||||
client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
|
||||
) -> dict | None:
|
||||
"""GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure.
|
||||
On 403, triggers IP rotation and retries once."""
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
resp = client.get(url, params=params)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
if resp.status_code == 403 and on_403:
|
||||
log.warning("HTTP 403 — IP likely blocked, rotating...")
|
||||
if rotate_ip():
|
||||
# Retry once with new IP (but don't recurse on 403 again)
|
||||
return fetch_with_retry(client, url, params, on_403=False)
|
||||
log.error("IP rotation failed, giving up on %s", url)
|
||||
return None
|
||||
if resp.status_code in (429, 500, 502, 503, 504):
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
||||
return None
|
||||
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
|
||||
time.sleep(delay)
|
||||
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rightmove API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
|
||||
"""Look up Rightmove's internal ID for an outcode via typeahead API."""
|
||||
if outcode in debug_data["outcode_cache"]:
|
||||
return debug_data["outcode_cache"][outcode]
|
||||
|
||||
data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
|
||||
if not data:
|
||||
return None
|
||||
|
||||
for match in data.get("matches", []):
|
||||
if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
|
||||
rid = str(match["id"])
|
||||
debug_data["outcode_cache"][outcode] = rid
|
||||
return rid
|
||||
|
||||
log.debug("Outcode %s not found in typeahead results", outcode)
|
||||
return None
|
||||
|
||||
|
||||
def search_outcode(
|
||||
client: httpx.Client,
|
||||
outcode_id: str,
|
||||
outcode: str,
|
||||
channel_cfg: dict,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
) -> list[dict]:
|
||||
"""Paginate through search results for one outcode+channel. Returns transformed properties."""
|
||||
properties = []
|
||||
index = 0
|
||||
|
||||
for page in range(MAX_PAGES_PER_OUTCODE):
|
||||
params = {
|
||||
"useLocationIdentifier": "true",
|
||||
"locationIdentifier": f"OUTCODE^{outcode_id}",
|
||||
"index": str(index),
|
||||
"sortType": channel_cfg["sortType"],
|
||||
"channel": channel_cfg["channel"],
|
||||
"transactionType": channel_cfg["transactionType"],
|
||||
}
|
||||
|
||||
data = fetch_with_retry(client, SEARCH_URL, params)
|
||||
if not data:
|
||||
log.warning("Failed to fetch page %d for %s/%s", page, outcode, channel_cfg["channel"])
|
||||
break
|
||||
|
||||
debug_data["last_response"] = data
|
||||
|
||||
raw_props = data.get("properties", [])
|
||||
if not raw_props:
|
||||
break
|
||||
|
||||
for prop in raw_props:
|
||||
transformed = transform_property(prop, outcode, pc_index)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
|
||||
# Check if there are more pages
|
||||
result_count_str = data.get("resultCount", "0")
|
||||
result_count = int(result_count_str.replace(",", ""))
|
||||
index += PAGE_SIZE
|
||||
|
||||
if index >= result_count:
|
||||
break
|
||||
|
||||
if page < MAX_PAGES_PER_OUTCODE - 1:
|
||||
time.sleep(DELAY_BETWEEN_PAGES)
|
||||
|
||||
return properties
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property transformation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_display_size(display_size: str | None) -> float | None:
|
||||
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
|
||||
if not display_size:
|
||||
return None
|
||||
# Try sq. ft. first
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
# Try sq. m.
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return None
|
||||
|
||||
|
||||
def map_property_type(sub_type: str | None) -> str:
|
||||
"""Map propertySubType to canonical type."""
|
||||
if not sub_type:
|
||||
return "Other"
|
||||
canonical = PROPERTY_TYPE_MAP.get(sub_type)
|
||||
if canonical:
|
||||
return canonical
|
||||
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
|
||||
return "Other"
|
||||
|
||||
|
||||
def extract_tenure(tenure_obj: dict | None) -> str | None:
|
||||
"""Extract tenure string from tenure object."""
|
||||
if not tenure_obj:
|
||||
return None
|
||||
tt = tenure_obj.get("tenureType", "")
|
||||
if tt == "FREEHOLD":
|
||||
return "Freehold"
|
||||
if tt == "LEASEHOLD":
|
||||
return "Leasehold"
|
||||
return None
|
||||
|
||||
|
||||
def fix_coords(lat: float, lng: float) -> tuple[float, float]:
|
||||
"""Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2."""
|
||||
if 49 <= lat <= 56 and -7 <= lng <= 2:
|
||||
return lat, lng
|
||||
if 49 <= lng <= 56 and -7 <= lat <= 2:
|
||||
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
|
||||
return lng, lat
|
||||
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
|
||||
return lat, lng
|
||||
|
||||
|
||||
def normalize_price(amount: int, frequency: str) -> int:
|
||||
"""Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
|
||||
if frequency == "weekly":
|
||||
return round(amount * 52 / 12)
|
||||
if frequency == "yearly":
|
||||
return round(amount / 12)
|
||||
return amount
|
||||
|
||||
|
||||
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
|
||||
"""Transform a raw Rightmove property dict into our output schema."""
|
||||
loc = prop.get("location")
|
||||
if not loc:
|
||||
return None
|
||||
raw_lat = loc.get("latitude")
|
||||
raw_lng = loc.get("longitude")
|
||||
if raw_lat is None or raw_lng is None:
|
||||
return None
|
||||
|
||||
lat, lng = fix_coords(raw_lat, raw_lng)
|
||||
|
||||
price_obj = prop.get("price", {})
|
||||
amount = price_obj.get("amount")
|
||||
if amount is None:
|
||||
return None
|
||||
frequency = price_obj.get("frequency", "")
|
||||
price = normalize_price(int(amount), frequency)
|
||||
|
||||
display_prices = price_obj.get("displayPrices", [])
|
||||
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
||||
|
||||
sub_type = prop.get("propertySubType", "")
|
||||
bedrooms = prop.get("bedrooms", 0) or 0
|
||||
bathrooms = prop.get("bathrooms", 0) or 0
|
||||
|
||||
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
|
||||
|
||||
listing_update = prop.get("listingUpdate", {})
|
||||
update_date = listing_update.get("listingUpdateDate", "")
|
||||
|
||||
postcode = pc_index.nearest(lat, lng)
|
||||
|
||||
return {
|
||||
"id": prop.get("id"),
|
||||
"bedrooms": bedrooms,
|
||||
"bathrooms": bathrooms,
|
||||
"total_rooms": bedrooms + bathrooms,
|
||||
"longitude": lng,
|
||||
"latitude": lat,
|
||||
"postcode": postcode,
|
||||
"address": prop.get("displayAddress", ""),
|
||||
"tenure": extract_tenure(prop.get("tenure")),
|
||||
"property_type": map_property_type(sub_type),
|
||||
"property_sub_type": sub_type or "Unknown",
|
||||
"price": price,
|
||||
"price_frequency": frequency,
|
||||
"price_qualifier": price_qualifier,
|
||||
"floorspace_sqm": parse_display_size(prop.get("displaySize")),
|
||||
"url": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
|
||||
"features": key_features,
|
||||
"first_visible_date": prop.get("firstVisibleDate", ""),
|
||||
"update_date": update_date,
|
||||
"outcode": outcode,
|
||||
"house_share": sub_type == "House Share",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parquet writing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def write_parquet(properties: list[dict], path: Path) -> None:
|
||||
"""Write properties list to parquet using Polars."""
|
||||
if not properties:
|
||||
log.warning("No properties to write to %s", path)
|
||||
return
|
||||
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": [p["id"] for p in properties],
|
||||
"bedrooms": [p["bedrooms"] for p in properties],
|
||||
"bathrooms": [p["bathrooms"] for p in properties],
|
||||
"total_rooms": [p["total_rooms"] for p in properties],
|
||||
"longitude": [p["longitude"] for p in properties],
|
||||
"latitude": [p["latitude"] for p in properties],
|
||||
"postcode": [p["postcode"] for p in properties],
|
||||
"address": [p["address"] for p in properties],
|
||||
"tenure": [p["tenure"] for p in properties],
|
||||
"property_type": [p["property_type"] for p in properties],
|
||||
"property_sub_type": [p["property_sub_type"] for p in properties],
|
||||
"price": [p["price"] for p in properties],
|
||||
"price_frequency": [p["price_frequency"] for p in properties],
|
||||
"price_qualifier": [p["price_qualifier"] for p in properties],
|
||||
"floorspace_sqm": [p["floorspace_sqm"] for p in properties],
|
||||
"url": [p["url"] for p in properties],
|
||||
"features": [p["features"] for p in properties],
|
||||
"first_visible_date": [p["first_visible_date"] for p in properties],
|
||||
"update_date": [p["update_date"] for p in properties],
|
||||
"outcode": [p["outcode"] for p in properties],
|
||||
"house_share": [p["house_share"] for p in properties],
|
||||
},
|
||||
schema={
|
||||
"id": pl.Int64,
|
||||
"bedrooms": pl.Int32,
|
||||
"bathrooms": pl.Int32,
|
||||
"total_rooms": pl.Int32,
|
||||
"longitude": pl.Float64,
|
||||
"latitude": pl.Float64,
|
||||
"postcode": pl.Utf8,
|
||||
"address": pl.Utf8,
|
||||
"tenure": pl.Utf8,
|
||||
"property_type": pl.Utf8,
|
||||
"property_sub_type": pl.Utf8,
|
||||
"price": pl.Int64,
|
||||
"price_frequency": pl.Utf8,
|
||||
"price_qualifier": pl.Utf8,
|
||||
"floorspace_sqm": pl.Float64,
|
||||
"url": pl.Utf8,
|
||||
"features": pl.List(pl.Utf8),
|
||||
"first_visible_date": pl.Utf8,
|
||||
"update_date": pl.Utf8,
|
||||
"outcode": pl.Utf8,
|
||||
"house_share": pl.Boolean,
|
||||
},
|
||||
)
|
||||
|
||||
df.write_parquet(path)
|
||||
log.info("Wrote %d properties to %s", len(df), path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scrape orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def load_outcodes() -> list[str]:
|
||||
"""Load England-only outcodes from arcgis parquet."""
|
||||
log.info("Loading outcodes from %s", ARCGIS_PATH)
|
||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||
england = df.filter(pl.col("ctry") == "E92000001")
|
||||
log.info("England postcodes: %d", len(england))
|
||||
|
||||
outcodes = (
|
||||
england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
|
||||
.drop_nulls()
|
||||
.get_column("outcode")
|
||||
.unique()
|
||||
.sort()
|
||||
.to_list()
|
||||
)
|
||||
log.info("Unique England outcodes: %d", len(outcodes))
|
||||
return outcodes
|
||||
|
||||
|
||||
def build_postcode_index() -> PostcodeSpatialIndex:
|
||||
"""Build spatial index from arcgis England postcodes."""
|
||||
log.info("Building postcode spatial index from %s", ARCGIS_PATH)
|
||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
|
||||
return PostcodeSpatialIndex(
|
||||
england.get_column("lat").to_list(),
|
||||
england.get_column("long").to_list(),
|
||||
england.get_column("pcd").to_list(),
|
||||
)
|
||||
|
||||
|
||||
def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
|
||||
"""Main scrape loop — runs in background thread."""
|
||||
global status
|
||||
with status_lock:
|
||||
status.state = "running"
|
||||
status.started_at = time.time()
|
||||
status.errors = []
|
||||
status.properties_buy = 0
|
||||
status.properties_rent = 0
|
||||
|
||||
# Shuffle for geographic diversity
|
||||
shuffled = list(outcodes)
|
||||
random.seed(SEED)
|
||||
random.shuffle(shuffled)
|
||||
|
||||
client = make_client()
|
||||
|
||||
try:
|
||||
for channel_cfg in CHANNELS:
|
||||
channel_name = channel_cfg["channel"]
|
||||
file_suffix = "buy" if channel_name == "BUY" else "rent"
|
||||
all_properties: dict[int, dict] = {} # dedup by id
|
||||
|
||||
with status_lock:
|
||||
status.channel = channel_name
|
||||
status.outcodes_done = 0
|
||||
status.outcodes_total = len(shuffled)
|
||||
|
||||
log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
|
||||
|
||||
for i, outcode in enumerate(shuffled):
|
||||
with status_lock:
|
||||
status.outcode = outcode
|
||||
status.outcodes_done = i
|
||||
|
||||
log.debug("Outcode %s (%d/%d) — %d properties so far",
|
||||
outcode, i + 1, len(shuffled), len(all_properties))
|
||||
|
||||
try:
|
||||
outcode_id = resolve_outcode_id(client, outcode)
|
||||
if not outcode_id:
|
||||
log.debug("No Rightmove ID for outcode %s, skipping", outcode)
|
||||
continue
|
||||
|
||||
props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
|
||||
for p in props:
|
||||
pid = p["id"]
|
||||
if pid not in all_properties:
|
||||
all_properties[pid] = p
|
||||
|
||||
with status_lock:
|
||||
if channel_name == "BUY":
|
||||
status.properties_buy = len(all_properties)
|
||||
else:
|
||||
status.properties_rent = len(all_properties)
|
||||
|
||||
log.info("Outcode %s: got %d properties (total: %d)", outcode, len(props), len(all_properties))
|
||||
|
||||
except Exception as e:
|
||||
msg = f"Error scraping {outcode}/{channel_name}: {e}"
|
||||
log.error(msg)
|
||||
with status_lock:
|
||||
status.errors.append(msg)
|
||||
|
||||
if i < len(shuffled) - 1:
|
||||
time.sleep(DELAY_BETWEEN_OUTCODES)
|
||||
|
||||
# Write parquet
|
||||
deduped = list(all_properties.values())
|
||||
output_path = DATA_DIR / f"rightmove_{file_suffix}.parquet"
|
||||
write_parquet(deduped, output_path)
|
||||
|
||||
with status_lock:
|
||||
if channel_name == "BUY":
|
||||
status.properties_buy = len(deduped)
|
||||
else:
|
||||
status.properties_rent = len(deduped)
|
||||
status.outcodes_done = len(shuffled)
|
||||
|
||||
log.info("=== %s channel complete: %d unique properties ===", channel_name, len(deduped))
|
||||
|
||||
with status_lock:
|
||||
status.state = "done"
|
||||
status.finished_at = time.time()
|
||||
elapsed = status.finished_at - status.started_at
|
||||
log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
|
||||
elapsed, status.properties_buy, status.properties_rent)
|
||||
|
||||
except Exception as e:
|
||||
log.exception("Fatal scrape error")
|
||||
with status_lock:
|
||||
status.state = "error"
|
||||
status.errors.append(f"Fatal: {e}")
|
||||
status.finished_at = time.time()
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Startup: load data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -693,12 +88,18 @@ def get_status():
|
|||
@app.route("/debug")
|
||||
def get_debug():
|
||||
return jsonify({
|
||||
"last_response": debug_data["last_response"],
|
||||
"outcode_cache_size": len(debug_data["outcode_cache"]),
|
||||
"outcode_cache_sample": dict(list(debug_data["outcode_cache"].items())[:20]),
|
||||
"outcode_cache_size": len(outcode_cache),
|
||||
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
|
||||
})
|
||||
|
||||
|
||||
@app.route("/metrics")
|
||||
def metrics():
|
||||
with status_lock:
|
||||
_sync_gauges()
|
||||
return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
@app.route("/data/<filename>")
|
||||
def serve_data(filename):
|
||||
if not filename.endswith(".parquet"):
|
||||
|
|
|
|||
59
finder/metrics.py
Normal file
59
finder/metrics.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
from prometheus_client import Counter, Gauge
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gauges — current scrape state, updated after each outcode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
scrape_state = Gauge(
|
||||
"scrape_state",
|
||||
"Current scrape state as a labeled gauge (1 = active)",
|
||||
["state"],
|
||||
)
|
||||
|
||||
scrape_outcodes_done = Gauge(
|
||||
"scrape_outcodes_done",
|
||||
"Outcodes processed in current channel",
|
||||
)
|
||||
|
||||
scrape_outcodes_total = Gauge(
|
||||
"scrape_outcodes_total",
|
||||
"Total outcodes in current channel",
|
||||
)
|
||||
|
||||
scrape_properties_total = Gauge(
|
||||
"scrape_properties_total",
|
||||
"Properties found so far",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
scrape_elapsed_seconds = Gauge(
|
||||
"scrape_elapsed_seconds",
|
||||
"Seconds since scrape started",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — monotonically increasing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
http_requests_total = Counter(
|
||||
"http_requests_total",
|
||||
"HTTP requests made by the scraper",
|
||||
["status", "endpoint"],
|
||||
)
|
||||
|
||||
http_errors_total = Counter(
|
||||
"http_errors_total",
|
||||
"HTTP connection/timeout errors",
|
||||
["type"],
|
||||
)
|
||||
|
||||
ip_rotations_total = Counter(
|
||||
"ip_rotations_total",
|
||||
"VPN IP rotation attempts",
|
||||
["result"],
|
||||
)
|
||||
|
||||
scrape_errors_total = Counter(
|
||||
"scrape_errors_total",
|
||||
"Per-outcode scrape errors",
|
||||
)
|
||||
|
|
@ -6,4 +6,6 @@ dependencies = [
|
|||
"flask",
|
||||
"httpx",
|
||||
"polars",
|
||||
"fake-useragent>=2.2.0",
|
||||
"prometheus-client",
|
||||
]
|
||||
|
|
|
|||
86
finder/rightmove.py
Normal file
86
finder/rightmove.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from constants import (
|
||||
PAGE_SIZE,
|
||||
DELAY_BETWEEN_PAGES,
|
||||
SEARCH_URL,
|
||||
TYPEAHEAD_URL,
|
||||
)
|
||||
from http_client import fetch_with_retry
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import transform_property
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
# Outcode ID cache (Rightmove typeahead → internal ID)
|
||||
outcode_cache: dict[str, str] = {}
|
||||
|
||||
|
||||
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
|
||||
"""Look up Rightmove's internal ID for an outcode via typeahead API."""
|
||||
if outcode in outcode_cache:
|
||||
return outcode_cache[outcode]
|
||||
|
||||
data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
|
||||
if not data:
|
||||
return None
|
||||
|
||||
for match in data.get("matches", []):
|
||||
if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
|
||||
rid = str(match["id"])
|
||||
outcode_cache[outcode] = rid
|
||||
return rid
|
||||
|
||||
log.debug("Outcode %s not found in typeahead results", outcode)
|
||||
return None
|
||||
|
||||
|
||||
def search_outcode(
|
||||
client: httpx.Client,
|
||||
outcode_id: str,
|
||||
outcode: str,
|
||||
channel_cfg: dict,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
) -> list[dict]:
|
||||
"""Paginate through search results for one outcode+channel. Returns transformed properties."""
|
||||
properties = []
|
||||
index = 0
|
||||
|
||||
while True:
|
||||
params = {
|
||||
"useLocationIdentifier": "true",
|
||||
"locationIdentifier": f"OUTCODE^{outcode_id}",
|
||||
"index": str(index),
|
||||
"sortType": channel_cfg["sortType"],
|
||||
"channel": channel_cfg["channel"],
|
||||
"transactionType": channel_cfg["transactionType"],
|
||||
}
|
||||
|
||||
data = fetch_with_retry(client, SEARCH_URL, params)
|
||||
if not data:
|
||||
log.warning("Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"])
|
||||
break
|
||||
|
||||
raw_props = data.get("properties", [])
|
||||
if not raw_props:
|
||||
break
|
||||
|
||||
for prop in raw_props:
|
||||
transformed = transform_property(prop, outcode, pc_index)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
|
||||
# Check if there are more pages
|
||||
result_count_str = data.get("resultCount", "0")
|
||||
result_count = int(result_count_str.replace(",", ""))
|
||||
index += PAGE_SIZE
|
||||
|
||||
if index >= result_count:
|
||||
break
|
||||
|
||||
time.sleep(DELAY_BETWEEN_PAGES)
|
||||
|
||||
return properties
|
||||
191
finder/scraper.py
Normal file
191
finder/scraper.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import polars as pl
|
||||
|
||||
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SEED
|
||||
from http_client import make_client
|
||||
from metrics import (
|
||||
scrape_elapsed_seconds,
|
||||
scrape_errors_total,
|
||||
scrape_outcodes_done,
|
||||
scrape_outcodes_total,
|
||||
scrape_properties_total,
|
||||
scrape_state,
|
||||
)
|
||||
from rightmove import resolve_outcode_id, search_outcode
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from storage import write_parquet
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScrapeStatus:
|
||||
state: str = "idle" # idle | running | done | error
|
||||
channel: str = ""
|
||||
outcode: str = ""
|
||||
outcodes_done: int = 0
|
||||
outcodes_total: int = 0
|
||||
properties_buy: int = 0
|
||||
properties_rent: int = 0
|
||||
errors: list[str] = field(default_factory=list)
|
||||
started_at: float = 0.0
|
||||
finished_at: float = 0.0
|
||||
|
||||
|
||||
status = ScrapeStatus()
|
||||
status_lock = threading.Lock()
|
||||
|
||||
|
||||
def _sync_gauges() -> None:
|
||||
"""Push current ScrapeStatus values into Prometheus gauges. Must hold status_lock."""
|
||||
for state in ("idle", "running", "done", "error"):
|
||||
scrape_state.labels(state=state).set(1 if status.state == state else 0)
|
||||
scrape_outcodes_done.set(status.outcodes_done)
|
||||
scrape_outcodes_total.set(status.outcodes_total)
|
||||
scrape_properties_total.labels(channel="buy").set(status.properties_buy)
|
||||
scrape_properties_total.labels(channel="rent").set(status.properties_rent)
|
||||
if status.started_at:
|
||||
end = status.finished_at if status.finished_at else time.time()
|
||||
scrape_elapsed_seconds.set(end - status.started_at)
|
||||
else:
|
||||
scrape_elapsed_seconds.set(0)
|
||||
|
||||
|
||||
def load_outcodes() -> list[str]:
|
||||
"""Load England-only outcodes from arcgis parquet."""
|
||||
log.info("Loading outcodes from %s", ARCGIS_PATH)
|
||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||
england = df.filter(pl.col("ctry") == "E92000001")
|
||||
log.info("England postcodes: %d", len(england))
|
||||
|
||||
outcodes = (
|
||||
england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
|
||||
.drop_nulls()
|
||||
.get_column("outcode")
|
||||
.unique()
|
||||
.sort()
|
||||
.to_list()
|
||||
)
|
||||
log.info("Unique England outcodes: %d", len(outcodes))
|
||||
return outcodes
|
||||
|
||||
|
||||
def build_postcode_index() -> PostcodeSpatialIndex:
|
||||
"""Build spatial index from arcgis England postcodes."""
|
||||
log.info("Building postcode spatial index from %s", ARCGIS_PATH)
|
||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
|
||||
return PostcodeSpatialIndex(
|
||||
england.get_column("lat").to_list(),
|
||||
england.get_column("long").to_list(),
|
||||
england.get_column("pcd").to_list(),
|
||||
)
|
||||
|
||||
|
||||
def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
|
||||
"""Main scrape loop — runs in background thread."""
|
||||
global status
|
||||
with status_lock:
|
||||
status.state = "running"
|
||||
status.started_at = time.time()
|
||||
status.errors = []
|
||||
status.properties_buy = 0
|
||||
status.properties_rent = 0
|
||||
_sync_gauges()
|
||||
|
||||
# Shuffle for geographic diversity
|
||||
shuffled = list(outcodes)
|
||||
random.seed(SEED)
|
||||
random.shuffle(shuffled)
|
||||
|
||||
client = make_client()
|
||||
|
||||
try:
|
||||
for channel_cfg in CHANNELS:
|
||||
channel_name = channel_cfg["channel"]
|
||||
file_suffix = "buy" if channel_name == "BUY" else "rent"
|
||||
all_properties: dict[int, dict] = {} # dedup by id
|
||||
|
||||
with status_lock:
|
||||
status.channel = channel_name
|
||||
status.outcodes_done = 0
|
||||
status.outcodes_total = len(shuffled)
|
||||
|
||||
log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
|
||||
|
||||
for i, outcode in enumerate(shuffled):
|
||||
with status_lock:
|
||||
status.outcode = outcode
|
||||
status.outcodes_done = i
|
||||
|
||||
log.debug("Outcode %s (%d/%d) — %d properties so far",
|
||||
outcode, i + 1, len(shuffled), len(all_properties))
|
||||
|
||||
try:
|
||||
outcode_id = resolve_outcode_id(client, outcode)
|
||||
if not outcode_id:
|
||||
log.debug("No Rightmove ID for outcode %s, skipping", outcode)
|
||||
continue
|
||||
|
||||
props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
|
||||
for p in props:
|
||||
pid = p["id"]
|
||||
if pid not in all_properties:
|
||||
all_properties[pid] = p
|
||||
|
||||
with status_lock:
|
||||
if channel_name == "BUY":
|
||||
status.properties_buy = len(all_properties)
|
||||
else:
|
||||
status.properties_rent = len(all_properties)
|
||||
_sync_gauges()
|
||||
|
||||
log.info("Outcode %s: got %d properties (total: %d)", outcode, len(props), len(all_properties))
|
||||
|
||||
except Exception as e:
|
||||
msg = f"Error scraping {outcode}/{channel_name}: {e}"
|
||||
log.error(msg)
|
||||
scrape_errors_total.inc()
|
||||
with status_lock:
|
||||
status.errors.append(msg)
|
||||
|
||||
if i < len(shuffled) - 1:
|
||||
time.sleep(DELAY_BETWEEN_OUTCODES)
|
||||
|
||||
# Write parquet
|
||||
deduped = list(all_properties.values())
|
||||
output_path = DATA_DIR / f"rightmove_{file_suffix}.parquet"
|
||||
write_parquet(deduped, output_path)
|
||||
|
||||
with status_lock:
|
||||
if channel_name == "BUY":
|
||||
status.properties_buy = len(deduped)
|
||||
else:
|
||||
status.properties_rent = len(deduped)
|
||||
status.outcodes_done = len(shuffled)
|
||||
_sync_gauges()
|
||||
|
||||
log.info("=== %s channel complete: %d unique properties ===", channel_name, len(deduped))
|
||||
|
||||
with status_lock:
|
||||
status.state = "done"
|
||||
status.finished_at = time.time()
|
||||
_sync_gauges()
|
||||
elapsed = status.finished_at - status.started_at
|
||||
log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
|
||||
elapsed, status.properties_buy, status.properties_rent)
|
||||
|
||||
except Exception as e:
|
||||
log.exception("Fatal scrape error")
|
||||
with status_lock:
|
||||
status.state = "error"
|
||||
status.errors.append(f"Fatal: {e}")
|
||||
status.finished_at = time.time()
|
||||
_sync_gauges()
|
||||
finally:
|
||||
client.close()
|
||||
33
finder/spatial.py
Normal file
33
finder/spatial.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import logging
|
||||
import math
|
||||
from collections import defaultdict
|
||||
|
||||
from constants import GRID_CELL_SIZE
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
class PostcodeSpatialIndex:
|
||||
"""Grid-based spatial index over arcgis postcodes for nearest-lookup."""
|
||||
|
||||
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
|
||||
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
|
||||
for lat, lng, pcd in zip(lats, lngs, postcodes):
|
||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
||||
self.grid[(gx, gy)].append((lat, lng, pcd))
|
||||
log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
|
||||
|
||||
def nearest(self, lat: float, lng: float) -> str | None:
|
||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
||||
best_dist = float("inf")
|
||||
best_pcd = None
|
||||
for dx in range(-1, 2):
|
||||
for dy in range(-1, 2):
|
||||
for plat, plng, pcd in self.grid.get((gx + dx, gy + dy), []):
|
||||
d = (plat - lat) ** 2 + (plng - lng) ** 2
|
||||
if d < best_dist:
|
||||
best_dist = d
|
||||
best_pcd = pcd
|
||||
return best_pcd
|
||||
65
finder/storage.py
Normal file
65
finder/storage.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
def write_parquet(properties: list[dict], path: Path) -> None:
|
||||
"""Write properties list to parquet using Polars."""
|
||||
if not properties:
|
||||
log.warning("No properties to write to %s", path)
|
||||
return
|
||||
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": [p["id"] for p in properties],
|
||||
"bedrooms": [p["bedrooms"] for p in properties],
|
||||
"bathrooms": [p["bathrooms"] for p in properties],
|
||||
"total_rooms": [p["total_rooms"] for p in properties],
|
||||
"longitude": [p["longitude"] for p in properties],
|
||||
"latitude": [p["latitude"] for p in properties],
|
||||
"postcode": [p["postcode"] for p in properties],
|
||||
"address": [p["address"] for p in properties],
|
||||
"tenure": [p["tenure"] for p in properties],
|
||||
"property_type": [p["property_type"] for p in properties],
|
||||
"property_sub_type": [p["property_sub_type"] for p in properties],
|
||||
"price": [p["price"] for p in properties],
|
||||
"price_frequency": [p["price_frequency"] for p in properties],
|
||||
"price_qualifier": [p["price_qualifier"] for p in properties],
|
||||
"floorspace_sqm": [p["floorspace_sqm"] for p in properties],
|
||||
"url": [p["url"] for p in properties],
|
||||
"features": [p["features"] for p in properties],
|
||||
"first_visible_date": [p["first_visible_date"] for p in properties],
|
||||
"update_date": [p["update_date"] for p in properties],
|
||||
"outcode": [p["outcode"] for p in properties],
|
||||
"house_share": [p["house_share"] for p in properties],
|
||||
},
|
||||
schema={
|
||||
"id": pl.Int64,
|
||||
"bedrooms": pl.Int32,
|
||||
"bathrooms": pl.Int32,
|
||||
"total_rooms": pl.Int32,
|
||||
"longitude": pl.Float64,
|
||||
"latitude": pl.Float64,
|
||||
"postcode": pl.Utf8,
|
||||
"address": pl.Utf8,
|
||||
"tenure": pl.Utf8,
|
||||
"property_type": pl.Utf8,
|
||||
"property_sub_type": pl.Utf8,
|
||||
"price": pl.Int64,
|
||||
"price_frequency": pl.Utf8,
|
||||
"price_qualifier": pl.Utf8,
|
||||
"floorspace_sqm": pl.Float64,
|
||||
"url": pl.Utf8,
|
||||
"features": pl.List(pl.Utf8),
|
||||
"first_visible_date": pl.Utf8,
|
||||
"update_date": pl.Utf8,
|
||||
"outcode": pl.Utf8,
|
||||
"house_share": pl.Boolean,
|
||||
},
|
||||
)
|
||||
|
||||
df.write_parquet(path)
|
||||
log.info("Wrote %d properties to %s", len(df), path)
|
||||
124
finder/transform.py
Normal file
124
finder/transform.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
|
||||
from spatial import PostcodeSpatialIndex
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
def parse_display_size(display_size: str | None) -> float | None:
|
||||
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
|
||||
if not display_size:
|
||||
return None
|
||||
# Try sq. ft. first
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
# Try sq. m.
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return None
|
||||
|
||||
|
||||
def map_property_type(sub_type: str | None) -> str:
|
||||
"""Map propertySubType to canonical type."""
|
||||
if not sub_type:
|
||||
return "Other"
|
||||
canonical = PROPERTY_TYPE_MAP.get(sub_type)
|
||||
if canonical:
|
||||
return canonical
|
||||
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
|
||||
return "Other"
|
||||
|
||||
|
||||
def extract_tenure(tenure_obj: dict | None) -> str | None:
|
||||
"""Extract tenure string from tenure object."""
|
||||
if not tenure_obj:
|
||||
return None
|
||||
tt = tenure_obj.get("tenureType", "")
|
||||
if tt == "FREEHOLD":
|
||||
return "Freehold"
|
||||
if tt == "LEASEHOLD":
|
||||
return "Leasehold"
|
||||
return None
|
||||
|
||||
|
||||
def fix_coords(lat: float, lng: float) -> tuple[float, float]:
|
||||
"""Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2."""
|
||||
if 49 <= lat <= 56 and -7 <= lng <= 2:
|
||||
return lat, lng
|
||||
if 49 <= lng <= 56 and -7 <= lat <= 2:
|
||||
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
|
||||
return lng, lat
|
||||
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
|
||||
return lat, lng
|
||||
|
||||
|
||||
def normalize_price(amount: int, frequency: str) -> int:
|
||||
"""Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
|
||||
if frequency == "weekly":
|
||||
return round(amount * 52 / 12)
|
||||
if frequency == "yearly":
|
||||
return round(amount / 12)
|
||||
return amount
|
||||
|
||||
|
||||
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
|
||||
"""Transform a raw Rightmove property dict into our output schema."""
|
||||
loc = prop.get("location")
|
||||
if not loc:
|
||||
return None
|
||||
raw_lat = loc.get("latitude")
|
||||
raw_lng = loc.get("longitude")
|
||||
if raw_lat is None or raw_lng is None:
|
||||
return None
|
||||
|
||||
lat, lng = fix_coords(raw_lat, raw_lng)
|
||||
|
||||
price_obj = prop.get("price", {})
|
||||
amount = price_obj.get("amount")
|
||||
if amount is None:
|
||||
return None
|
||||
frequency = price_obj.get("frequency", "")
|
||||
price = normalize_price(int(amount), frequency)
|
||||
|
||||
display_prices = price_obj.get("displayPrices", [])
|
||||
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
||||
|
||||
sub_type = prop.get("propertySubType", "")
|
||||
bedrooms = prop.get("bedrooms", 0) or 0
|
||||
bathrooms = prop.get("bathrooms", 0) or 0
|
||||
|
||||
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
|
||||
|
||||
listing_update = prop.get("listingUpdate", {})
|
||||
update_date = listing_update.get("listingUpdateDate", "")
|
||||
|
||||
postcode = pc_index.nearest(lat, lng)
|
||||
|
||||
return {
|
||||
"id": prop.get("id"),
|
||||
"bedrooms": bedrooms,
|
||||
"bathrooms": bathrooms,
|
||||
"total_rooms": bedrooms + bathrooms,
|
||||
"longitude": lng,
|
||||
"latitude": lat,
|
||||
"postcode": postcode,
|
||||
"address": prop.get("displayAddress", ""),
|
||||
"tenure": extract_tenure(prop.get("tenure")),
|
||||
"property_type": map_property_type(sub_type),
|
||||
"property_sub_type": sub_type or "Unknown",
|
||||
"price": price,
|
||||
"price_frequency": frequency,
|
||||
"price_qualifier": price_qualifier,
|
||||
"floorspace_sqm": parse_display_size(prop.get("displaySize")),
|
||||
"url": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
|
||||
"features": key_features,
|
||||
"first_visible_date": prop.get("firstVisibleDate", ""),
|
||||
"update_date": update_date,
|
||||
"outcode": outcode,
|
||||
"house_share": sub_type == "House Share",
|
||||
}
|
||||
137
frontend/package-lock.json
generated
137
frontend/package-lock.json
generated
|
|
@ -21,6 +21,7 @@
|
|||
"pocketbase": "^0.26.8",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-joyride": "^2.9.3",
|
||||
"react-map-gl": "^7.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
@ -2033,6 +2034,11 @@
|
|||
"integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@gilbarbara/deep-equal": {
|
||||
"version": "0.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@gilbarbara/deep-equal/-/deep-equal-0.3.1.tgz",
|
||||
"integrity": "sha512-I7xWjLs2YSVMc5gGx1Z3ZG1lgFpITPndpi8Ku55GeEIKpACCPQNS/OTqQbxgTCfq0Ncvcc+CrFov96itVh6Qvw=="
|
||||
},
|
||||
"node_modules/@humanwhocodes/config-array": {
|
||||
"version": "0.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
|
||||
|
|
@ -4623,7 +4629,6 @@
|
|||
"version": "15.7.15",
|
||||
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz",
|
||||
"integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/qs": {
|
||||
|
|
@ -4644,7 +4649,6 @@
|
|||
"version": "18.3.27",
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.27.tgz",
|
||||
"integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/prop-types": "*",
|
||||
|
|
@ -6772,7 +6776,6 @@
|
|||
"version": "3.2.3",
|
||||
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
|
||||
"integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/data-uri-to-buffer": {
|
||||
|
|
@ -6856,6 +6859,12 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/deep-diff": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/deep-diff/-/deep-diff-1.0.2.tgz",
|
||||
"integrity": "sha512-aWS3UIVH+NPGCD1kki+DCU9Dua032iSsO43LqQpcs4R3+dVv7tX0qBGjiVHJHjplsoUM2XRO/KB92glqc68awg==",
|
||||
"deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info."
|
||||
},
|
||||
"node_modules/deep-is": {
|
||||
"version": "0.1.4",
|
||||
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
|
||||
|
|
@ -6875,6 +6884,14 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/deepmerge": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
||||
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/default-browser": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.4.0.tgz",
|
||||
|
|
@ -9693,6 +9710,11 @@
|
|||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/is-lite": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/is-lite/-/is-lite-1.2.1.tgz",
|
||||
"integrity": "sha512-pgF+L5bxC+10hLBgf6R2P4ZZUBOQIIacbdo8YvuCP8/JvsWxG7aZ9p10DYuLtifFci4l3VITphhMlMV4Y+urPw=="
|
||||
},
|
||||
"node_modules/is-map": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz",
|
||||
|
|
@ -10710,7 +10732,6 @@
|
|||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
|
|
@ -11253,6 +11274,16 @@
|
|||
"resolved": "https://registry.npmjs.org/pocketbase/-/pocketbase-0.26.8.tgz",
|
||||
"integrity": "sha512-aQ/ewvS7ncvAE8wxoW10iAZu6ElgbeFpBhKPnCfvRovNzm2gW8u/sQNPGN6vNgVEagz44kK//C61oKjfa+7Low=="
|
||||
},
|
||||
"node_modules/popper.js": {
|
||||
"version": "1.16.1",
|
||||
"resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
|
||||
"integrity": "sha512-Wb4p1J4zyFTbM+u6WuO4XstYx4Ky9Cewe4DWrel7B0w6VVICvPwdOpotjzcf6eD8TsckVnIMNONQyPIUFOUbCQ==",
|
||||
"deprecated": "You can find the new Popper v2 at @popperjs/core, this package is dedicated to the legacy v1",
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/popperjs"
|
||||
}
|
||||
},
|
||||
"node_modules/possible-typed-array-names": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
|
||||
|
|
@ -11608,7 +11639,6 @@
|
|||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.4.0",
|
||||
|
|
@ -11854,13 +11884,89 @@
|
|||
"react": "^18.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-floater": {
|
||||
"version": "0.7.9",
|
||||
"resolved": "https://registry.npmjs.org/react-floater/-/react-floater-0.7.9.tgz",
|
||||
"integrity": "sha512-NXqyp9o8FAXOATOEo0ZpyaQ2KPb4cmPMXGWkx377QtJkIXHlHRAGer7ai0r0C1kG5gf+KJ6Gy+gdNIiosvSicg==",
|
||||
"dependencies": {
|
||||
"deepmerge": "^4.3.1",
|
||||
"is-lite": "^0.8.2",
|
||||
"popper.js": "^1.16.0",
|
||||
"prop-types": "^15.8.1",
|
||||
"tree-changes": "^0.9.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "15 - 18",
|
||||
"react-dom": "15 - 18"
|
||||
}
|
||||
},
|
||||
"node_modules/react-floater/node_modules/@gilbarbara/deep-equal": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@gilbarbara/deep-equal/-/deep-equal-0.1.2.tgz",
|
||||
"integrity": "sha512-jk+qzItoEb0D0xSSmrKDDzf9sheQj/BAPxlgNxgmOaA3mxpUa6ndJLYGZKsJnIVEQSD8zcTbyILz7I0HcnBCRA=="
|
||||
},
|
||||
"node_modules/react-floater/node_modules/is-lite": {
|
||||
"version": "0.8.2",
|
||||
"resolved": "https://registry.npmjs.org/is-lite/-/is-lite-0.8.2.tgz",
|
||||
"integrity": "sha512-JZfH47qTsslwaAsqbMI3Q6HNNjUuq6Cmzzww50TdP5Esb6e1y2sK2UAaZZuzfAzpoI2AkxoPQapZdlDuP6Vlsw=="
|
||||
},
|
||||
"node_modules/react-floater/node_modules/tree-changes": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/tree-changes/-/tree-changes-0.9.3.tgz",
|
||||
"integrity": "sha512-vvvS+O6kEeGRzMglTKbc19ltLWNtmNt1cpBoSYLj/iEcPVvpJasemKOlxBrmZaCtDJoF+4bwv3m01UKYi8mukQ==",
|
||||
"dependencies": {
|
||||
"@gilbarbara/deep-equal": "^0.1.1",
|
||||
"is-lite": "^0.8.2"
|
||||
}
|
||||
},
|
||||
"node_modules/react-innertext": {
|
||||
"version": "1.1.5",
|
||||
"resolved": "https://registry.npmjs.org/react-innertext/-/react-innertext-1.1.5.tgz",
|
||||
"integrity": "sha512-PWAqdqhxhHIv80dT9znP2KvS+hfkbRovFp4zFYHFFlOoQLRiawIic81gKb3U1wEyJZgMwgs3JoLtwryASRWP3Q==",
|
||||
"peerDependencies": {
|
||||
"@types/react": ">=0.0.0 <=99",
|
||||
"react": ">=0.0.0 <=99"
|
||||
}
|
||||
},
|
||||
"node_modules/react-is": {
|
||||
"version": "16.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/react-joyride": {
|
||||
"version": "2.9.3",
|
||||
"resolved": "https://registry.npmjs.org/react-joyride/-/react-joyride-2.9.3.tgz",
|
||||
"integrity": "sha512-1+Mg34XK5zaqJ63eeBhqdbk7dlGCFp36FXwsEvgpjqrtyywX2C6h9vr3jgxP0bGHCw8Ilsp/nRDzNVq6HJ3rNw==",
|
||||
"dependencies": {
|
||||
"@gilbarbara/deep-equal": "^0.3.1",
|
||||
"deep-diff": "^1.0.2",
|
||||
"deepmerge": "^4.3.1",
|
||||
"is-lite": "^1.2.1",
|
||||
"react-floater": "^0.7.9",
|
||||
"react-innertext": "^1.1.5",
|
||||
"react-is": "^16.13.1",
|
||||
"scroll": "^3.0.1",
|
||||
"scrollparent": "^2.1.0",
|
||||
"tree-changes": "^0.11.2",
|
||||
"type-fest": "^4.27.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "15 - 18",
|
||||
"react-dom": "15 - 18"
|
||||
}
|
||||
},
|
||||
"node_modules/react-joyride/node_modules/type-fest": {
|
||||
"version": "4.41.0",
|
||||
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
|
||||
"integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/react-map-gl": {
|
||||
"version": "7.1.9",
|
||||
"resolved": "https://registry.npmjs.org/react-map-gl/-/react-map-gl-7.1.9.tgz",
|
||||
|
|
@ -12457,6 +12563,16 @@
|
|||
"url": "https://opencollective.com/webpack"
|
||||
}
|
||||
},
|
||||
"node_modules/scroll": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/scroll/-/scroll-3.0.1.tgz",
|
||||
"integrity": "sha512-pz7y517OVls1maEzlirKO5nPYle9AXsFzTMNJrRGmT951mzpIBy7sNHOg5o/0MQd/NqliCiWnAi0kZneMPFLcg=="
|
||||
},
|
||||
"node_modules/scrollparent": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/scrollparent/-/scrollparent-2.1.0.tgz",
|
||||
"integrity": "sha512-bnnvJL28/Rtz/kz2+4wpBjHzWoEzXhVg/TE8BeVGJHUqE8THNIRnDxDWMktwM+qahvlRdvlLdsQfYe+cuqfZeA=="
|
||||
},
|
||||
"node_modules/select-hose": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/select-hose/-/select-hose-2.0.0.tgz",
|
||||
|
|
@ -13754,6 +13870,15 @@
|
|||
"node": ">=0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/tree-changes": {
|
||||
"version": "0.11.3",
|
||||
"resolved": "https://registry.npmjs.org/tree-changes/-/tree-changes-0.11.3.tgz",
|
||||
"integrity": "sha512-r14mvDZ6tqz8PRQmlFKjhUVngu4VZ9d92ON3tp0EGpFBE6PAHOq8Bx8m8ahbNoGE3uI/npjYcJiqVydyOiYXag==",
|
||||
"dependencies": {
|
||||
"@gilbarbara/deep-equal": "^0.3.1",
|
||||
"is-lite": "^1.2.1"
|
||||
}
|
||||
},
|
||||
"node_modules/tree-dump": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz",
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
"pocketbase": "^0.26.8",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-joyride": "^2.9.3",
|
||||
"react-map-gl": "^7.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import PricingPage from './components/pricing/PricingPage';
|
|||
import HomePage from './components/home/HomePage';
|
||||
import SavedSearchesPage from './components/saved-searches/SavedSearchesPage';
|
||||
import LearnPage from './components/learn/LearnPage';
|
||||
import AccountPage from './components/account/AccountPage';
|
||||
import Header, { type Page } from './components/ui/Header';
|
||||
import AuthModal from './components/ui/AuthModal';
|
||||
import SaveSearchModal from './components/ui/SaveSearchModal';
|
||||
|
|
@ -32,6 +33,8 @@ case 'saved-searches':
|
|||
return '/learn';
|
||||
case 'pricing':
|
||||
return '/pricing';
|
||||
case 'account':
|
||||
return '/account';
|
||||
default:
|
||||
return '/';
|
||||
}
|
||||
|
|
@ -42,6 +45,7 @@ function pathToPage(pathname: string): Page | null {
|
|||
if (pathname === '/saved') return 'saved-searches';
|
||||
if (pathname === '/learn') return 'learn';
|
||||
if (pathname === '/pricing') return 'pricing';
|
||||
if (pathname === '/account') return 'account';
|
||||
if (pathname === '/') return 'home';
|
||||
return null;
|
||||
}
|
||||
|
|
@ -92,6 +96,7 @@ export default function App() {
|
|||
register,
|
||||
logout,
|
||||
requestPasswordReset,
|
||||
refreshAuth,
|
||||
clearError,
|
||||
} = useAuth();
|
||||
const [showAuthModal, setShowAuthModal] = useState(false);
|
||||
|
|
@ -233,6 +238,8 @@ export default function App() {
|
|||
<PricingPage onOpenDashboard={() => navigateTo('dashboard')} />
|
||||
) : activePage === 'learn' ? (
|
||||
<LearnPage />
|
||||
) : activePage === 'account' && user ? (
|
||||
<AccountPage user={user} onRefreshAuth={refreshAuth} />
|
||||
) : activePage === 'saved-searches' ? (
|
||||
<SavedSearchesPage
|
||||
searches={savedSearches.searches}
|
||||
|
|
|
|||
131
frontend/src/components/account/AccountPage.tsx
Normal file
131
frontend/src/components/account/AccountPage.tsx
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import { useState } from 'react';
|
||||
import type { AuthUser } from '../../hooks/useAuth';
|
||||
import { apiUrl, authHeaders, assertOk } from '../../lib/api';
|
||||
import { SpinnerIcon } from '../ui/icons/SpinnerIcon';
|
||||
import { CheckIcon } from '../ui/icons/CheckIcon';
|
||||
|
||||
const SUBSCRIPTION_OPTIONS = ['free', 'rental', 'buyer'] as const;
|
||||
|
||||
const SUBSCRIPTION_LABELS: Record<string, string> = {
|
||||
free: 'Free',
|
||||
rental: 'Rental',
|
||||
buyer: 'Buyer',
|
||||
};
|
||||
|
||||
export default function AccountPage({
|
||||
user,
|
||||
onRefreshAuth,
|
||||
}: {
|
||||
user: AuthUser;
|
||||
onRefreshAuth: () => Promise<void>;
|
||||
}) {
|
||||
const [selectedSubscription, setSelectedSubscription] = useState(user.subscription || 'free');
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [saved, setSaved] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const handleSave = async () => {
|
||||
setSaving(true);
|
||||
setError(null);
|
||||
setSaved(false);
|
||||
try {
|
||||
const res = await fetch(apiUrl('subscription'), {
|
||||
method: 'PATCH',
|
||||
...authHeaders({
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ subscription: selectedSubscription }),
|
||||
}),
|
||||
});
|
||||
assertOk(res, 'Update subscription');
|
||||
await onRefreshAuth();
|
||||
setSaved(true);
|
||||
setTimeout(() => setSaved(false), 2000);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to update subscription';
|
||||
setError(msg);
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const badgeColor =
|
||||
user.subscription === 'buyer'
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400'
|
||||
: user.subscription === 'rental'
|
||||
? 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400'
|
||||
: 'bg-warm-100 text-warm-600 dark:bg-warm-700 dark:text-warm-300';
|
||||
|
||||
return (
|
||||
<div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950">
|
||||
<div className="max-w-lg mx-auto px-6 py-16">
|
||||
<h1 className="text-2xl font-bold text-navy-950 dark:text-warm-100 mb-8">Account</h1>
|
||||
|
||||
<div className="bg-white dark:bg-warm-800 rounded-xl border border-warm-200 dark:border-warm-700 divide-y divide-warm-200 dark:divide-warm-700">
|
||||
{/* Email */}
|
||||
<div className="px-5 py-4 flex items-center justify-between">
|
||||
<div>
|
||||
<p className="text-sm text-warm-500 dark:text-warm-400">Email</p>
|
||||
<p className="text-navy-950 dark:text-warm-100 font-medium">{user.email}</p>
|
||||
</div>
|
||||
<span
|
||||
className={`text-xs font-medium px-2 py-0.5 rounded-full ${
|
||||
user.verified
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400'
|
||||
: 'bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400'
|
||||
}`}
|
||||
>
|
||||
{user.verified ? 'Verified' : 'Unverified'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Subscription */}
|
||||
<div className="px-5 py-4 flex items-center justify-between">
|
||||
<div>
|
||||
<p className="text-sm text-warm-500 dark:text-warm-400">Subscription</p>
|
||||
<span className={`inline-block text-sm font-medium px-2.5 py-0.5 rounded-full mt-1 ${badgeColor}`}>
|
||||
{SUBSCRIPTION_LABELS[user.subscription] || user.subscription || 'Free'}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Admin section */}
|
||||
{user.isAdmin && (
|
||||
<div className="px-5 py-4">
|
||||
<p className="text-sm text-warm-500 dark:text-warm-400 mb-3">
|
||||
Admin: Change subscription
|
||||
</p>
|
||||
<div className="flex items-center gap-3">
|
||||
<select
|
||||
value={selectedSubscription}
|
||||
onChange={(e) => setSelectedSubscription(e.target.value)}
|
||||
className="flex-1 px-3 py-2 rounded-lg border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-900 text-navy-950 dark:text-warm-200 text-sm"
|
||||
>
|
||||
{SUBSCRIPTION_OPTIONS.map((opt) => (
|
||||
<option key={opt} value={opt}>
|
||||
{SUBSCRIPTION_LABELS[opt]}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving || selectedSubscription === user.subscription}
|
||||
className="px-4 py-2 rounded-lg bg-teal-600 hover:bg-teal-700 text-white text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
>
|
||||
{saving ? (
|
||||
<SpinnerIcon className="w-4 h-4 animate-spin" />
|
||||
) : saved ? (
|
||||
<CheckIcon className="w-4 h-4" />
|
||||
) : null}
|
||||
{saved ? 'Saved' : 'Save'}
|
||||
</button>
|
||||
</div>
|
||||
{error && (
|
||||
<p className="mt-2 text-sm text-red-600 dark:text-red-400">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,131 +1,63 @@
|
|||
import { useRef, useEffect } from 'react';
|
||||
import { useMemo } from 'react';
|
||||
|
||||
const HEX_COUNT = 70;
|
||||
const TAU = Math.PI * 2;
|
||||
const HEX_COUNT = 50;
|
||||
|
||||
interface Hex {
|
||||
x: number;
|
||||
y: number;
|
||||
baseY: number;
|
||||
interface HexConfig {
|
||||
size: number;
|
||||
opacity: number;
|
||||
speed: number;
|
||||
phase: number;
|
||||
top: number;
|
||||
driftDuration: number;
|
||||
bobDuration: number;
|
||||
bobAmount: number;
|
||||
delay: number;
|
||||
reverse: boolean;
|
||||
}
|
||||
|
||||
function initHexes(w: number, h: number): Hex[] {
|
||||
const hexes: Hex[] = [];
|
||||
function generateHexes(): HexConfig[] {
|
||||
const hexes: HexConfig[] = [];
|
||||
for (let i = 0; i < HEX_COUNT; i++) {
|
||||
const y = Math.random() * h;
|
||||
const side = Math.random() < 0.5 ? 'left' : 'right';
|
||||
const x = side === 'left' ? Math.random() * w * 0.3 : w * 0.7 + Math.random() * w * 0.3;
|
||||
const driftDuration = 18 + Math.random() * 35;
|
||||
hexes.push({
|
||||
x,
|
||||
y,
|
||||
baseY: y,
|
||||
size: 8 + Math.random() * 20,
|
||||
opacity: 0.08 + Math.random() * 0.15,
|
||||
speed: 6 + Math.random() * 14,
|
||||
phase: Math.random() * TAU,
|
||||
size: 10 + Math.random() * 32,
|
||||
opacity: 0.06 + Math.random() * 0.18,
|
||||
top: Math.random() * 100,
|
||||
driftDuration,
|
||||
bobDuration: 3 + Math.random() * 5,
|
||||
bobAmount: 8 + Math.random() * 30,
|
||||
delay: -Math.random() * driftDuration,
|
||||
reverse: Math.random() < 0.3,
|
||||
});
|
||||
}
|
||||
return hexes;
|
||||
}
|
||||
|
||||
function drawHex(ctx: CanvasRenderingContext2D, cx: number, cy: number, r: number) {
|
||||
ctx.beginPath();
|
||||
for (let i = 0; i < 6; i++) {
|
||||
const angle = (TAU / 6) * i - Math.PI / 6;
|
||||
const px = cx + r * Math.cos(angle);
|
||||
const py = cy + r * Math.sin(angle);
|
||||
if (i === 0) ctx.moveTo(px, py);
|
||||
else ctx.lineTo(px, py);
|
||||
}
|
||||
ctx.closePath();
|
||||
}
|
||||
|
||||
export default function HexCanvas({ isDark = false }: { isDark?: boolean }) {
|
||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||
const hexesRef = useRef<Hex[]>([]);
|
||||
const animRef = useRef(0);
|
||||
const isDarkRef = useRef(isDark);
|
||||
isDarkRef.current = isDark;
|
||||
|
||||
useEffect(() => {
|
||||
const canvas = canvasRef.current;
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (!ctx) return;
|
||||
|
||||
let w = 0;
|
||||
let h = 0;
|
||||
|
||||
function resize() {
|
||||
const dpr = window.devicePixelRatio || 1;
|
||||
const rect = canvas!.parentElement!.getBoundingClientRect();
|
||||
w = rect.width;
|
||||
h = rect.height;
|
||||
canvas!.width = w * dpr;
|
||||
canvas!.height = h * dpr;
|
||||
canvas!.style.width = `${w}px`;
|
||||
canvas!.style.height = `${h}px`;
|
||||
ctx!.setTransform(dpr, 0, 0, dpr, 0, 0);
|
||||
hexesRef.current = initHexes(w, h);
|
||||
}
|
||||
|
||||
resize();
|
||||
const ro = new ResizeObserver(resize);
|
||||
ro.observe(canvas.parentElement!);
|
||||
|
||||
let prev = performance.now();
|
||||
|
||||
function frame(now: number) {
|
||||
const dt = (now - prev) / 1000;
|
||||
prev = now;
|
||||
ctx!.clearRect(0, 0, w, h);
|
||||
|
||||
for (const hex of hexesRef.current) {
|
||||
hex.x += hex.speed * dt * 0.3;
|
||||
if (hex.x > w * 0.3 + hex.size && hex.x < w * 0.7 - hex.size) {
|
||||
hex.x = w * 0.7 + hex.size;
|
||||
}
|
||||
if (hex.x > w + hex.size * 2) {
|
||||
hex.x = -hex.size * 2;
|
||||
hex.y = Math.random() * h;
|
||||
hex.baseY = hex.y;
|
||||
}
|
||||
|
||||
const bob = Math.sin(now / 1000 + hex.phase) * 8;
|
||||
hex.y = hex.baseY + bob;
|
||||
|
||||
const dark = isDarkRef.current;
|
||||
ctx!.globalAlpha = hex.opacity * (dark ? 0.6 : 1);
|
||||
ctx!.fillStyle = dark ? '#058172' : '#00a28c';
|
||||
drawHex(ctx!, hex.x, hex.y, hex.size);
|
||||
ctx!.fill();
|
||||
|
||||
ctx!.globalAlpha = hex.opacity * 0.5 * (dark ? 0.6 : 1);
|
||||
ctx!.strokeStyle = dark ? '#0a665b' : '#05c9aa';
|
||||
ctx!.lineWidth = 1;
|
||||
drawHex(ctx!, hex.x, hex.y, hex.size);
|
||||
ctx!.stroke();
|
||||
}
|
||||
|
||||
animRef.current = requestAnimationFrame(frame);
|
||||
}
|
||||
|
||||
animRef.current = requestAnimationFrame(frame);
|
||||
return () => {
|
||||
cancelAnimationFrame(animRef.current);
|
||||
ro.disconnect();
|
||||
};
|
||||
}, []);
|
||||
const hexes = useMemo(generateHexes, []);
|
||||
|
||||
return (
|
||||
<canvas
|
||||
ref={canvasRef}
|
||||
className="absolute inset-0 pointer-events-none"
|
||||
style={{ zIndex: 0 }}
|
||||
/>
|
||||
<div className="absolute inset-0 overflow-hidden pointer-events-none" style={{ zIndex: 0 }}>
|
||||
{hexes.map((hex, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="absolute"
|
||||
style={{
|
||||
top: `${hex.top}%`,
|
||||
animation: `hex-drift ${hex.driftDuration}s linear ${hex.delay}s infinite${hex.reverse ? ' reverse' : ''}`,
|
||||
}}
|
||||
>
|
||||
<div
|
||||
className="bg-teal-500"
|
||||
style={{
|
||||
width: hex.size,
|
||||
height: hex.size,
|
||||
opacity: hex.opacity * (isDark ? 0.6 : 1),
|
||||
clipPath: 'polygon(50% 0%, 100% 25%, 100% 75%, 50% 100%, 0% 75%, 0% 25%)',
|
||||
animation: `hex-bob ${hex.bobDuration}s ease-in-out infinite`,
|
||||
'--bob': `${hex.bobAmount}px`,
|
||||
} as React.CSSProperties}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ export default function HomePage({
|
|||
<HexCanvas isDark={theme === 'dark'} />
|
||||
{/* Radial teal glow */}
|
||||
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[600px] h-[400px] bg-teal-500/[0.07] rounded-full blur-3xl pointer-events-none" />
|
||||
<div className="relative z-10 max-w-4xl mx-auto px-6">
|
||||
<div className="relative z-10 max-w-4xl mx-auto px-6 md:px-10 py-6 backdrop-blur-sm bg-navy-950/30 rounded-2xl">
|
||||
<p className="text-teal-400 font-semibold tracking-wide uppercase text-sm mb-4">
|
||||
Browsing listings is not a strategy. Knowing what you want is.
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -119,7 +119,37 @@ export default memo(function Filters({
|
|||
onAiFilterSubmit,
|
||||
}: FiltersProps) {
|
||||
const availableFeatures = features.filter((f) => !enabledFeatures.has(f.name));
|
||||
const enabledFeatureList = features.filter((f) => enabledFeatures.has(f.name));
|
||||
const enabledFeatureList = features.filter(
|
||||
(f) => enabledFeatures.has(f.name) && f.name !== 'Listing status'
|
||||
);
|
||||
|
||||
const listingToggles = useMemo(() => {
|
||||
const val = filters['Listing status'] as string[] | undefined;
|
||||
if (!val) return { historical: true, buy: true, rent: true };
|
||||
return {
|
||||
historical: val.includes('Historical sale'),
|
||||
buy: val.includes('For sale'),
|
||||
rent: val.includes('For rent'),
|
||||
};
|
||||
}, [filters]);
|
||||
|
||||
const handleListingToggle = useCallback(
|
||||
(key: 'historical' | 'buy' | 'rent') => {
|
||||
const next = { ...listingToggles, [key]: !listingToggles[key] };
|
||||
const allOn = next.historical && next.buy && next.rent;
|
||||
const allOff = !next.historical && !next.buy && !next.rent;
|
||||
if (allOn || allOff) {
|
||||
onRemoveFilter('Listing status');
|
||||
return;
|
||||
}
|
||||
const values: string[] = [];
|
||||
if (next.historical) values.push('Historical sale');
|
||||
if (next.buy) values.push('For sale');
|
||||
if (next.rent) values.push('For rent');
|
||||
onFilterChange('Listing status', values);
|
||||
},
|
||||
[listingToggles, onFilterChange, onRemoveFilter]
|
||||
);
|
||||
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [showPhilosophy, setShowPhilosophy] = useState(false);
|
||||
|
|
@ -155,7 +185,8 @@ export default memo(function Filters({
|
|||
return scales;
|
||||
}, [features]);
|
||||
|
||||
const badgeCount = enabledFeatureList.length + activeModes.length;
|
||||
const hasListingFilter = !listingToggles.historical || !listingToggles.buy || !listingToggles.rent;
|
||||
const badgeCount = enabledFeatureList.length + activeModes.length + (hasListingFilter ? 1 : 0);
|
||||
|
||||
return (
|
||||
<div ref={containerRef} className="flex flex-col bg-white dark:bg-navy-950 overflow-y-auto md:overflow-hidden h-full">
|
||||
|
|
@ -171,6 +202,17 @@ export default memo(function Filters({
|
|||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div className="shrink-0 flex items-center gap-2 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||
<span className="text-xs font-medium text-warm-500 dark:text-warm-400">Show</span>
|
||||
<PillGroup>
|
||||
<PillToggle label="Historical" active={listingToggles.historical}
|
||||
onClick={() => handleListingToggle('historical')} size="xs" />
|
||||
<PillToggle label="Buy" active={listingToggles.buy}
|
||||
onClick={() => handleListingToggle('buy')} size="xs" />
|
||||
<PillToggle label="Rent" active={listingToggles.rent}
|
||||
onClick={() => handleListingToggle('rent')} size="xs" />
|
||||
</PillGroup>
|
||||
</div>
|
||||
<div className="shrink-0 md:shrink md:min-h-0 flex flex-col md:basis-[40%]">
|
||||
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||
<div className="flex items-center gap-2">
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ export default function LocationSearch({
|
|||
}
|
||||
|
||||
return (
|
||||
<div ref={containerRef} className="absolute top-3 left-3 z-10 flex flex-col">
|
||||
<div ref={containerRef} data-tutorial="search" className="absolute top-3 left-3 z-10 flex flex-col">
|
||||
<div className="flex items-center shadow-lg rounded overflow-hidden bg-white dark:bg-warm-800">
|
||||
<SearchIcon className="w-4 h-4 text-warm-400 dark:text-warm-500 ml-3 shrink-0" />
|
||||
<PlaceSearchInput
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ interface MapProps {
|
|||
features: FeatureMeta[];
|
||||
selectedHexagonId: string | null;
|
||||
hoveredHexagonId: string | null;
|
||||
onHexagonClick: (id: string, isPostcode?: boolean) => void;
|
||||
onHexagonClick: (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => void;
|
||||
onHexagonHover: (h3: string | null, x?: number, y?: number) => void;
|
||||
initialViewState?: ViewState;
|
||||
theme?: 'light' | 'dark';
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useState, useEffect, useMemo, useCallback } from 'react';
|
||||
import type { FeatureMeta, FeatureFilters, POICategoryGroup, ViewState } from '../../types';
|
||||
import type { FeatureMeta, FeatureFilters, POICategoryGroup, ViewState, PostcodeGeometry } from '../../types';
|
||||
import type { SearchedLocation } from './LocationSearch';
|
||||
import type { Page } from '../ui/Header';
|
||||
import Map from './Map';
|
||||
|
|
@ -18,6 +18,9 @@ import { usePaneResize } from '../../hooks/usePaneResize';
|
|||
import { useAiFilters } from '../../hooks/useAiFilters';
|
||||
import { useAreaSummary } from '../../hooks/useAreaSummary';
|
||||
import { useUrlSync } from '../../hooks/useUrlSync';
|
||||
import { useTutorial } from '../../hooks/useTutorial';
|
||||
import { getTutorialStyles } from '../../lib/tutorial-styles';
|
||||
import Joyride from 'react-joyride';
|
||||
import {
|
||||
useTravelTime,
|
||||
TRANSPORT_MODES,
|
||||
|
|
@ -191,8 +194,8 @@ export default function MapPage({
|
|||
// On mobile, open drawer and switch tab when hexagon is clicked
|
||||
const { handleHexagonClick } = selection;
|
||||
const handleMobileHexagonClick = useCallback(
|
||||
(id: string, isPostcode?: boolean) => {
|
||||
handleHexagonClick(id, isPostcode);
|
||||
(id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => {
|
||||
handleHexagonClick(id, isPostcode, geometry);
|
||||
if (id) {
|
||||
setMobileDrawerOpen(true);
|
||||
}
|
||||
|
|
@ -225,6 +228,9 @@ export default function MapPage({
|
|||
mapData.resolution,
|
||||
]);
|
||||
|
||||
// Tutorial
|
||||
const tutorial = useTutorial(initialLoading, isMobile);
|
||||
|
||||
// AI area summary
|
||||
const aiSummary = useAreaSummary({
|
||||
stats: selection.areaStats,
|
||||
|
|
@ -551,8 +557,20 @@ export default function MapPage({
|
|||
</div>
|
||||
)}
|
||||
|
||||
<Joyride
|
||||
steps={tutorial.steps}
|
||||
run={tutorial.run}
|
||||
continuous
|
||||
showProgress
|
||||
showSkipButton
|
||||
callback={tutorial.handleCallback}
|
||||
styles={getTutorialStyles(theme)}
|
||||
disableScrolling
|
||||
/>
|
||||
|
||||
{/* Left Pane */}
|
||||
<div
|
||||
data-tutorial="filters"
|
||||
className="flex bg-white dark:bg-navy-950 shadow-lg overflow-hidden"
|
||||
style={{ width: leftPaneWidth }}
|
||||
>
|
||||
|
|
@ -566,7 +584,7 @@ export default function MapPage({
|
|||
</div>
|
||||
|
||||
{/* Map */}
|
||||
<div className="flex-1 relative">
|
||||
<div data-tutorial="map" className="flex-1 relative">
|
||||
<Map
|
||||
data={mapData.data}
|
||||
postcodeData={mapData.postcodeData}
|
||||
|
|
@ -599,6 +617,7 @@ export default function MapPage({
|
|||
)}
|
||||
{/* Floating POI button */}
|
||||
<button
|
||||
data-tutorial="poi-button"
|
||||
onClick={() => setPoiPaneOpen((p) => !p)}
|
||||
className={`absolute bottom-4 right-4 z-10 p-2 rounded-lg shadow-lg bg-white dark:bg-warm-800 ${poiPaneOpen ? 'text-teal-600 dark:text-teal-400' : 'text-warm-500 dark:text-warm-400 hover:text-teal-600 dark:hover:text-teal-400'}`}
|
||||
>
|
||||
|
|
@ -614,6 +633,7 @@ export default function MapPage({
|
|||
|
||||
{/* Right Pane */}
|
||||
<div
|
||||
data-tutorial="right-pane"
|
||||
className="flex bg-white dark:bg-navy-950 shadow-lg z-10"
|
||||
style={{ width: rightPaneWidth }}
|
||||
>
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import { SpinnerIcon } from './icons/SpinnerIcon';
|
|||
import UserMenu from './UserMenu';
|
||||
import MobileMenu from './MobileMenu';
|
||||
|
||||
export type Page = 'home' | 'dashboard' | 'saved-searches' | 'learn' | 'pricing';
|
||||
export type Page = 'home' | 'dashboard' | 'saved-searches' | 'learn' | 'pricing' | 'account';
|
||||
|
||||
export default function Header({
|
||||
activePage,
|
||||
|
|
@ -200,7 +200,7 @@ export default function Header({
|
|||
{!isMobile && (
|
||||
<>
|
||||
{user ? (
|
||||
<UserMenu user={user} onLogout={onLogout} />
|
||||
<UserMenu user={user} onLogout={onLogout} onPageChange={onPageChange} />
|
||||
) : (
|
||||
<>
|
||||
<button
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ export default function MobileMenu({
|
|||
{user && mobileNavItem('saved-searches', 'Saved')}
|
||||
{mobileNavItem('learn', 'Learn')}
|
||||
{mobileNavItem('pricing', 'Pricing')}
|
||||
{user && mobileNavItem('account', 'Account')}
|
||||
|
||||
{/* Dashboard actions */}
|
||||
{activePage === 'dashboard' && (
|
||||
|
|
|
|||
|
|
@ -1,7 +1,16 @@
|
|||
import { useState, useRef, useEffect } from 'react';
|
||||
import type { AuthUser } from '../../hooks/useAuth';
|
||||
import type { Page } from './Header';
|
||||
|
||||
export default function UserMenu({ user, onLogout }: { user: AuthUser; onLogout: () => void }) {
|
||||
export default function UserMenu({
|
||||
user,
|
||||
onLogout,
|
||||
onPageChange,
|
||||
}: {
|
||||
user: AuthUser;
|
||||
onLogout: () => void;
|
||||
onPageChange: (page: Page) => void;
|
||||
}) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const menuRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
|
|
@ -37,6 +46,15 @@ export default function UserMenu({ user, onLogout }: { user: AuthUser; onLogout:
|
|||
</p>
|
||||
</div>
|
||||
<div className="p-1">
|
||||
<button
|
||||
onClick={() => {
|
||||
setOpen(false);
|
||||
onPageChange('account');
|
||||
}}
|
||||
className="w-full text-left px-3 py-2 text-sm text-warm-700 dark:text-warm-300 hover:bg-warm-50 dark:hover:bg-warm-700 rounded"
|
||||
>
|
||||
Account
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
setOpen(false);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ export interface AuthUser {
|
|||
id: string;
|
||||
email: string;
|
||||
verified: boolean;
|
||||
isAdmin: boolean;
|
||||
subscription: string;
|
||||
}
|
||||
|
||||
function recordToUser(record: { id: string; [key: string]: unknown }): AuthUser {
|
||||
|
|
@ -15,6 +17,8 @@ function recordToUser(record: { id: string; [key: string]: unknown }): AuthUser
|
|||
id: record.id,
|
||||
email: record.email,
|
||||
verified: typeof record.verified === 'boolean' ? record.verified : false,
|
||||
isAdmin: typeof record.is_admin === 'boolean' ? record.is_admin : false,
|
||||
subscription: typeof record.subscription === 'string' ? record.subscription : 'free',
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -110,6 +114,11 @@ export function useAuth() {
|
|||
}
|
||||
}, []);
|
||||
|
||||
const refreshAuth = useCallback(async () => {
|
||||
const result = await pb.collection('users').authRefresh();
|
||||
setUser(recordToUser(result.record));
|
||||
}, []);
|
||||
|
||||
const clearError = useCallback(() => {
|
||||
setError(null);
|
||||
}, []);
|
||||
|
|
@ -123,6 +132,7 @@ export function useAuth() {
|
|||
loginWithOAuth,
|
||||
logout,
|
||||
requestPasswordReset,
|
||||
refreshAuth,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { useCallback, useRef, useState, useMemo } from 'react';
|
||||
import { useCallback, useRef, useState, useMemo, useEffect } from 'react';
|
||||
import { H3HexagonLayer } from '@deck.gl/geo-layers';
|
||||
import { GeoJsonLayer, IconLayer, TextLayer, ScatterplotLayer } from '@deck.gl/layers';
|
||||
import type { PickingInfo } from '@deck.gl/core';
|
||||
|
|
@ -18,6 +18,7 @@ import {
|
|||
type TransportMode,
|
||||
type TravelTimeEntries,
|
||||
} from './useTravelTime';
|
||||
import { MarchingAntsExtension } from '../lib/MarchingAntsExtension';
|
||||
|
||||
/** Convert POI id (e.g. "n12345") to OpenStreetMap URL */
|
||||
function osmIdToUrl(id: string): string | null {
|
||||
|
|
@ -40,7 +41,7 @@ interface UseDeckLayersProps {
|
|||
features: FeatureMeta[];
|
||||
selectedHexagonId: string | null;
|
||||
hoveredHexagonId: string | null;
|
||||
onHexagonClick: (id: string, isPostcode?: boolean) => void;
|
||||
onHexagonClick: (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => void;
|
||||
onHexagonHover: (h3: string | null, x?: number, y?: number) => void;
|
||||
theme: 'light' | 'dark';
|
||||
selectedPostcodeGeometry?: PostcodeGeometry | null;
|
||||
|
|
@ -89,9 +90,18 @@ export function useDeckLayers({
|
|||
}: UseDeckLayersProps) {
|
||||
const [popupInfo, setPopupInfo] = useState<PopupInfo | null>(null);
|
||||
const [hoverPosition, setHoverPosition] = useState<{ x: number; y: number } | null>(null);
|
||||
const [selectedPostcode, setSelectedPostcode] = useState<string | null>(null);
|
||||
const [hoveredPostcode, setHoveredPostcode] = useState<string | null>(null);
|
||||
|
||||
// Marching ants animation
|
||||
const [marchTime, setMarchTime] = useState(0);
|
||||
const hasPostcodeGeometry = selectedPostcodeGeometry != null;
|
||||
useEffect(() => {
|
||||
if (!hasPostcodeGeometry) return;
|
||||
setMarchTime(0);
|
||||
const id = setInterval(() => setMarchTime((t) => t + 0.3), 50);
|
||||
return () => clearInterval(id);
|
||||
}, [hasPostcodeGeometry]);
|
||||
|
||||
const isDark = theme === 'dark';
|
||||
const densityGradient = isDark ? DENSITY_GRADIENT_DARK : DENSITY_GRADIENT;
|
||||
|
||||
|
|
@ -110,8 +120,6 @@ export function useDeckLayers({
|
|||
selectedHexagonIdRef.current = selectedHexagonId;
|
||||
const hoveredHexagonIdRef = useRef(hoveredHexagonId);
|
||||
hoveredHexagonIdRef.current = hoveredHexagonId;
|
||||
const selectedPostcodeRef = useRef(selectedPostcode);
|
||||
selectedPostcodeRef.current = selectedPostcode;
|
||||
const hoveredPostcodeRef = useRef(hoveredPostcode);
|
||||
hoveredPostcodeRef.current = hoveredPostcode;
|
||||
|
||||
|
|
@ -233,8 +241,7 @@ export function useDeckLayers({
|
|||
const handlePostcodeClick = useCallback((info: PickingInfo<any>) => {
|
||||
const pc = info.object?.properties?.postcode;
|
||||
if (pc) {
|
||||
setSelectedPostcode((prev) => (prev === pc ? null : pc));
|
||||
onHexagonClickRef.current(pc, true);
|
||||
onHexagonClickRef.current(pc, true, info.object?.geometry);
|
||||
}
|
||||
}, []);
|
||||
|
||||
|
|
@ -265,7 +272,7 @@ export function useDeckLayers({
|
|||
}, [travelTimeEntries, travelTimeColorRanges]);
|
||||
|
||||
const colorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${countRange.min}|${countRange.max}|${selectedHexagonId}|${hoveredHexagonId}|${theme}|${ttTrigger}`;
|
||||
const postcodeColorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${postcodeCountRange.min}|${postcodeCountRange.max}|${selectedPostcode}|${hoveredPostcode}|${theme}|${ttTrigger}`;
|
||||
const postcodeColorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${postcodeCountRange.min}|${postcodeCountRange.max}|${hoveredPostcode}|${theme}|${ttTrigger}`;
|
||||
|
||||
// --- Layers ---
|
||||
const hexLayer = useMemo(
|
||||
|
|
@ -423,8 +430,6 @@ export function useDeckLayers({
|
|||
getLineColor: (f) => {
|
||||
const pc = f.properties.postcode;
|
||||
const dark = isDarkRef.current;
|
||||
if (pc === selectedPostcodeRef.current)
|
||||
return [255, 255, 255, 255] as [number, number, number, number];
|
||||
if (pc === hoveredPostcodeRef.current)
|
||||
return [29, 228, 195, 200] as [number, number, number, number];
|
||||
return (dark ? [180, 170, 160, 100] : [100, 100, 100, 150]) as [
|
||||
|
|
@ -436,7 +441,6 @@ export function useDeckLayers({
|
|||
},
|
||||
getLineWidth: (f) => {
|
||||
const pc = f.properties.postcode;
|
||||
if (pc === selectedPostcodeRef.current) return 3;
|
||||
if (pc === hoveredPostcodeRef.current) return 2;
|
||||
return 1;
|
||||
},
|
||||
|
|
@ -500,37 +504,28 @@ export function useDeckLayers({
|
|||
[pois, stablePoiHover]
|
||||
);
|
||||
|
||||
// Check if the selected postcode has data (passes current filters)
|
||||
const selectedPostcodeHasData = useMemo(() => {
|
||||
if (!selectedPostcodeGeometry || !selectedHexagonId) return false;
|
||||
return postcodeData.some((f) => f.properties.postcode === selectedHexagonId);
|
||||
}, [selectedPostcodeGeometry, selectedHexagonId, postcodeData]);
|
||||
|
||||
// Highlight layer for selected postcode (from search)
|
||||
const selectedPostcodeHighlightLayer = useMemo(() => {
|
||||
// Marching ants highlight layer for selected postcode (click or search)
|
||||
const marchingAntsLayer = useMemo(() => {
|
||||
if (!selectedPostcodeGeometry) return null;
|
||||
const hasData = selectedPostcodeHasData;
|
||||
const feature = {
|
||||
type: 'Feature' as const,
|
||||
geometry: selectedPostcodeGeometry,
|
||||
properties: {},
|
||||
};
|
||||
return new GeoJsonLayer({
|
||||
id: 'searched-postcode-highlight',
|
||||
data: [feature],
|
||||
getFillColor: hasData
|
||||
? [29, 228, 195, 40] // teal tint when has data
|
||||
: [255, 180, 0, 30], // orange tint when filtered out
|
||||
getLineColor: hasData
|
||||
? [29, 228, 195, 255] // solid teal when has data
|
||||
: [255, 180, 0, 200], // orange when filtered out (no matching properties)
|
||||
getLineWidth: hasData ? 4 : 3,
|
||||
lineWidthUnits: 'pixels',
|
||||
id: 'marching-ants',
|
||||
data: [
|
||||
{
|
||||
type: 'Feature' as const,
|
||||
geometry: selectedPostcodeGeometry,
|
||||
properties: {},
|
||||
},
|
||||
],
|
||||
filled: false,
|
||||
stroked: true,
|
||||
filled: true,
|
||||
getLineColor: [29, 228, 195, 255],
|
||||
getLineWidth: 3,
|
||||
lineWidthUnits: 'pixels' as const,
|
||||
pickable: false,
|
||||
marchTime,
|
||||
extensions: [new MarchingAntsExtension()],
|
||||
});
|
||||
}, [selectedPostcodeGeometry, selectedPostcodeHasData]);
|
||||
}, [selectedPostcodeGeometry, marchTime]);
|
||||
|
||||
// Destination markers: one red dot per mode with a destination
|
||||
const destinationMarkerData = useMemo(() => {
|
||||
|
|
@ -566,7 +561,7 @@ export function useDeckLayers({
|
|||
const baseLayers: any[] = usePostcodeView
|
||||
? [postcodeLayer, postcodeLabelsLayer, poiLayer]
|
||||
: [hexLayer, poiLayer];
|
||||
if (selectedPostcodeHighlightLayer) baseLayers.push(selectedPostcodeHighlightLayer);
|
||||
if (marchingAntsLayer) baseLayers.push(marchingAntsLayer);
|
||||
if (destinationMarkerLayer) baseLayers.push(destinationMarkerLayer);
|
||||
return baseLayers;
|
||||
}, [
|
||||
|
|
@ -575,7 +570,7 @@ export function useDeckLayers({
|
|||
postcodeLayer,
|
||||
postcodeLabelsLayer,
|
||||
poiLayer,
|
||||
selectedPostcodeHighlightLayer,
|
||||
marchingAntsLayer,
|
||||
destinationMarkerLayer,
|
||||
]);
|
||||
|
||||
|
|
@ -594,7 +589,6 @@ export function useDeckLayers({
|
|||
postcodeCountRange,
|
||||
colorFeatureMeta,
|
||||
handleMouseLeave,
|
||||
selectedPostcode,
|
||||
hoveredPostcode,
|
||||
primaryTravelMode,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -99,15 +99,16 @@ export function useHexagonSelection({ filters, features, resolution }: UseHexago
|
|||
);
|
||||
|
||||
const handleHexagonClick = useCallback(
|
||||
(id: string, isPostcode = false) => {
|
||||
setSelectedPostcodeGeometry(null);
|
||||
(id: string, isPostcode = false, geometry?: PostcodeGeometry) => {
|
||||
if (selectedHexagon?.id === id) {
|
||||
setSelectedHexagon(null);
|
||||
setProperties([]);
|
||||
setAreaStats(null);
|
||||
setSelectedPostcodeGeometry(null);
|
||||
} else {
|
||||
const type = isPostcode ? 'postcode' : 'hexagon';
|
||||
setSelectedHexagon({ id, type, resolution });
|
||||
setSelectedPostcodeGeometry(isPostcode && geometry ? geometry : null);
|
||||
setProperties([]);
|
||||
setPropertiesTotal(0);
|
||||
setPropertiesOffset(0);
|
||||
|
|
|
|||
86
frontend/src/hooks/useTutorial.ts
Normal file
86
frontend/src/hooks/useTutorial.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import { useState, useCallback, useMemo } from 'react';
|
||||
import type { Step, CallBackProps } from 'react-joyride';
|
||||
import { ACTIONS, EVENTS, STATUS } from 'react-joyride';
|
||||
|
||||
const STORAGE_KEY = 'tutorial_completed';
|
||||
|
||||
const STEPS: Step[] = [
|
||||
{
|
||||
target: '[data-tutorial="filters"]',
|
||||
title: 'Filter Properties',
|
||||
content:
|
||||
'Use filters to narrow down properties by price, energy rating, floor area, and more. Pin a filter to colour the map by that feature.',
|
||||
placement: 'right',
|
||||
disableBeacon: true,
|
||||
},
|
||||
{
|
||||
target: '[data-tutorial="map"]',
|
||||
title: 'Explore the Map',
|
||||
content:
|
||||
'Pan and zoom to explore property data across the UK. Click any hexagon to see detailed stats and individual properties.',
|
||||
placement: 'bottom',
|
||||
disableBeacon: true,
|
||||
},
|
||||
{
|
||||
target: '[data-tutorial="search"]',
|
||||
title: 'Search Locations',
|
||||
content:
|
||||
'Search for a place name or postcode to jump directly to that area on the map.',
|
||||
placement: 'bottom',
|
||||
disableBeacon: true,
|
||||
},
|
||||
{
|
||||
target: '[data-tutorial="right-pane"]',
|
||||
title: 'Area Stats & Properties',
|
||||
content:
|
||||
'After clicking a hexagon, view aggregated area statistics or browse individual properties in this pane.',
|
||||
placement: 'left',
|
||||
disableBeacon: true,
|
||||
},
|
||||
{
|
||||
target: '[data-tutorial="poi-button"]',
|
||||
title: 'Points of Interest',
|
||||
content:
|
||||
'Toggle points of interest like schools, shops, and transport stops to see what amenities are nearby.',
|
||||
placement: 'left',
|
||||
disableBeacon: true,
|
||||
},
|
||||
];
|
||||
|
||||
export function useTutorial(initialLoading: boolean, isMobile: boolean) {
|
||||
const [run, setRun] = useState(() => {
|
||||
if (isMobile) return false;
|
||||
return !localStorage.getItem(STORAGE_KEY);
|
||||
});
|
||||
|
||||
const shouldRun = run && !initialLoading && !isMobile;
|
||||
|
||||
const handleCallback = useCallback((data: CallBackProps) => {
|
||||
const { status, action, type } = data;
|
||||
|
||||
if (status === STATUS.FINISHED || status === STATUS.SKIPPED) {
|
||||
localStorage.setItem(STORAGE_KEY, '1');
|
||||
setRun(false);
|
||||
}
|
||||
// Also stop if user closes a tooltip via the X button
|
||||
if (action === ACTIONS.CLOSE && type === EVENTS.STEP_AFTER) {
|
||||
localStorage.setItem(STORAGE_KEY, '1');
|
||||
setRun(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const resetTutorial = useCallback(() => {
|
||||
localStorage.removeItem(STORAGE_KEY);
|
||||
setRun(true);
|
||||
}, []);
|
||||
|
||||
return useMemo(
|
||||
() => ({
|
||||
steps: STEPS,
|
||||
run: shouldRun,
|
||||
handleCallback,
|
||||
resetTutorial,
|
||||
}),
|
||||
[shouldRun, handleCallback, resetTutorial]
|
||||
);
|
||||
}
|
||||
|
|
@ -40,6 +40,17 @@ h3 {
|
|||
color 0.2s ease;
|
||||
}
|
||||
|
||||
/* Hexagon background animations */
|
||||
@keyframes hex-drift {
|
||||
from { transform: translateX(-5vw); }
|
||||
to { transform: translateX(105vw); }
|
||||
}
|
||||
|
||||
@keyframes hex-bob {
|
||||
0%, 100% { transform: translateY(var(--bob)); }
|
||||
50% { transform: translateY(calc(var(--bob) * -1)); }
|
||||
}
|
||||
|
||||
/* Fade-in animation for homepage sections */
|
||||
.fade-in-section {
|
||||
opacity: 0;
|
||||
|
|
|
|||
53
frontend/src/lib/MarchingAntsExtension.ts
Normal file
53
frontend/src/lib/MarchingAntsExtension.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import { LayerExtension } from '@deck.gl/core';
|
||||
|
||||
/** Animates a marching-ants border on PathLayer sublayers (alternating white/green dashes). */
|
||||
export class MarchingAntsExtension extends LayerExtension {
|
||||
static extensionName = 'MarchingAntsExtension';
|
||||
static defaultProps = {
|
||||
marchTime: { type: 'number', value: 0 },
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
isEnabled(layer: any): boolean {
|
||||
return 'pathTesselator' in layer.state;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
getShaders(extension: any): any {
|
||||
if (!extension.isEnabled(this)) return null;
|
||||
return {
|
||||
modules: [
|
||||
{
|
||||
name: 'marchingAnts',
|
||||
inject: {
|
||||
'fs:#decl': `\
|
||||
uniform marchingAntsUniforms {
|
||||
float marchTime;
|
||||
} marchingAnts;`,
|
||||
'fs:DECKGL_FILTER_COLOR': `\
|
||||
float marchSegLen = 4.0;
|
||||
float marchPos = mod(vPathPosition.y - marchingAnts.marchTime, marchSegLen * 2.0);
|
||||
if (marchPos < marchSegLen) {
|
||||
color = vec4(1.0, 1.0, 1.0, color.a);
|
||||
} else {
|
||||
color = vec4(0.114, 0.894, 0.765, color.a);
|
||||
}`,
|
||||
},
|
||||
uniformTypes: {
|
||||
marchTime: 'f32',
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
updateState(_params: any, extension: any): void {
|
||||
if (!extension.isEnabled(this)) return;
|
||||
// @ts-expect-error setShaderModuleProps exists on Layer
|
||||
this.setShaderModuleProps({
|
||||
// @ts-expect-error marchTime is a custom prop from this extension
|
||||
marchingAnts: { marchTime: this.props.marchTime || 0 },
|
||||
});
|
||||
}
|
||||
}
|
||||
52
frontend/src/lib/tutorial-styles.ts
Normal file
52
frontend/src/lib/tutorial-styles.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import type { Styles } from 'react-joyride';
|
||||
|
||||
export function getTutorialStyles(theme: 'light' | 'dark'): Partial<Styles> {
|
||||
const isDark = theme === 'dark';
|
||||
|
||||
return {
|
||||
options: {
|
||||
arrowColor: isDark ? '#292524' : '#ffffff',
|
||||
backgroundColor: isDark ? '#292524' : '#ffffff',
|
||||
overlayColor: isDark ? 'rgba(10,14,26,0.75)' : 'rgba(0,0,0,0.5)',
|
||||
primaryColor: '#00a28c',
|
||||
textColor: isDark ? '#d6d3d1' : '#44403c',
|
||||
zIndex: 1000,
|
||||
},
|
||||
tooltip: {
|
||||
borderRadius: 8,
|
||||
padding: 16,
|
||||
},
|
||||
tooltipTitle: {
|
||||
color: isDark ? '#f5f5f4' : '#0a0e1a',
|
||||
fontSize: 15,
|
||||
fontWeight: 600,
|
||||
},
|
||||
tooltipContent: {
|
||||
fontSize: 13,
|
||||
lineHeight: 1.5,
|
||||
padding: '8px 0 0',
|
||||
},
|
||||
buttonNext: {
|
||||
borderRadius: 6,
|
||||
fontSize: 13,
|
||||
fontWeight: 500,
|
||||
padding: '6px 14px',
|
||||
},
|
||||
buttonBack: {
|
||||
color: isDark ? '#a8a29e' : '#78716c',
|
||||
fontSize: 13,
|
||||
fontWeight: 500,
|
||||
marginRight: 8,
|
||||
},
|
||||
buttonSkip: {
|
||||
color: isDark ? '#78716c' : '#a8a29e',
|
||||
fontSize: 12,
|
||||
},
|
||||
buttonClose: {
|
||||
color: isDark ? '#a8a29e' : '#78716c',
|
||||
},
|
||||
spotlight: {
|
||||
borderRadius: 8,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -16,7 +16,6 @@ from shapely import wkb
|
|||
from shapely.geometry import MultiPolygon, Polygon
|
||||
from tqdm import tqdm
|
||||
|
||||
from .pois import download_pbf
|
||||
|
||||
MIN_AREA_SQM = 5_000 # ~70m x 70m — skip pocket parks and small ponds
|
||||
|
||||
|
|
@ -103,12 +102,7 @@ def main():
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.pbf.exists():
|
||||
pbf_file = args.pbf
|
||||
print(f"Using existing PBF: {pbf_file}")
|
||||
else:
|
||||
download_pbf(args.pbf)
|
||||
|
||||
pbf_file = args.pbf
|
||||
print("Extracting greenspace/water areas from PBF (two-pass area assembly)...")
|
||||
with tqdm(
|
||||
unit=" areas", unit_scale=True, desc="Processing", smoothing=0.05
|
||||
|
|
|
|||
|
|
@ -1,121 +0,0 @@
|
|||
"""Shared utilities for price index, price estimate, and renovation premium scripts."""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
CURRENT_YEAR = 2025
|
||||
TERRACE_TYPES = [
|
||||
"Mid-Terrace",
|
||||
"End-Terrace",
|
||||
"Enclosed Mid-Terrace",
|
||||
"Enclosed End-Terrace",
|
||||
"Terraced",
|
||||
]
|
||||
FLAT_TYPES = ["Flats/Maisonettes", "Flat", "Maisonette"]
|
||||
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"]
|
||||
SHRINKAGE_K = 50
|
||||
|
||||
|
||||
def type_group_expr():
|
||||
"""Polars expression: Property type -> type_group."""
|
||||
return (
|
||||
pl.when(pl.col("Property type").is_in(TERRACE_TYPES))
|
||||
.then(pl.lit("Terraced"))
|
||||
.when(pl.col("Property type").is_in(FLAT_TYPES))
|
||||
.then(pl.lit("Flats"))
|
||||
.when(pl.col("Property type") == "Bungalow")
|
||||
.then(pl.lit("Bungalow"))
|
||||
.when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
|
||||
.then(pl.col("Property type"))
|
||||
.otherwise(pl.lit(None))
|
||||
.alias("type_group")
|
||||
)
|
||||
|
||||
|
||||
def sector_expr():
|
||||
"""Polars expression: Postcode -> sector (drop last 2 chars, strip)."""
|
||||
return (
|
||||
pl.col("Postcode")
|
||||
.str.slice(0, pl.col("Postcode").str.len_chars() - 2)
|
||||
.str.strip_chars()
|
||||
.alias("sector")
|
||||
)
|
||||
|
||||
|
||||
def hierarchy_keys(sector: str) -> tuple[str, str]:
|
||||
"""Return (district, area) for a sector string."""
|
||||
district = sector.rsplit(" ", 1)[0] if " " in sector else sector
|
||||
area = ""
|
||||
for ch in district:
|
||||
if ch.isalpha():
|
||||
area += ch
|
||||
else:
|
||||
break
|
||||
return district, area
|
||||
|
||||
|
||||
AGE_BREAKS = [1900, 1930, 1950, 1967, 1983, 2000, 2010]
|
||||
AGE_LABELS = [
|
||||
"pre-1900",
|
||||
"1900-1929",
|
||||
"1930-1949",
|
||||
"1950-1966",
|
||||
"1967-1982",
|
||||
"1983-1999",
|
||||
"2000-2009",
|
||||
"2010+",
|
||||
]
|
||||
|
||||
HEDONIC_COLUMNS = [
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
"Property type",
|
||||
"Total floor area (sqm)",
|
||||
"Postcode",
|
||||
]
|
||||
|
||||
|
||||
def age_band_expr():
|
||||
"""Polars expression: Construction age (UInt16 year) → age band string."""
|
||||
expr = pl.when(pl.col("Construction age").is_null()).then(pl.lit(None))
|
||||
for i, brk in enumerate(AGE_BREAKS):
|
||||
expr = expr.when(pl.col("Construction age") < brk).then(pl.lit(AGE_LABELS[i]))
|
||||
return expr.otherwise(pl.lit(AGE_LABELS[-1])).alias("age_band")
|
||||
|
||||
|
||||
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"]
|
||||
|
||||
|
||||
def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:
|
||||
"""Build hedonic feature matrix from a DataFrame with type_group column.
|
||||
|
||||
Columns (5 total): log(floor_area), 4 type dummies (ref: Detached).
|
||||
Sector fixed effects do the heavy lifting — additional property features
|
||||
(EPC, rooms, age) add no predictive value after sector demeaning.
|
||||
"""
|
||||
fa = df["Total floor area (sqm)"].to_numpy().astype(np.float32)
|
||||
log_fa = np.log(np.maximum(fa, 1.0)).reshape(-1, 1)
|
||||
tg = df["type_group"].to_numpy()
|
||||
parts = [log_fa]
|
||||
for t in NON_REF_TYPES:
|
||||
parts.append((tg == t).astype(np.float32).reshape(-1, 1))
|
||||
return np.hstack(parts)
|
||||
|
||||
|
||||
def extract_centroids(input_path) -> dict[str, tuple[float, float]]:
|
||||
"""Compute mean lat/lon per postcode sector."""
|
||||
print("Computing sector centroids...")
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select("Postcode", "lat", "lon")
|
||||
.filter(pl.col("Postcode").is_not_null(), pl.col("lat").is_not_null())
|
||||
.with_columns(sector_expr())
|
||||
.group_by("sector")
|
||||
.agg(pl.col("lat").mean(), pl.col("lon").mean())
|
||||
.collect()
|
||||
)
|
||||
centroids = {}
|
||||
for row in df.iter_rows(named=True):
|
||||
centroids[row["sector"]] = (row["lat"], row["lon"])
|
||||
print(f" {len(centroids):,} sector centroids")
|
||||
return centroids
|
||||
|
|
@ -1,300 +0,0 @@
|
|||
"""Cross-Sectional Hedonic Model (Per-Type)
|
||||
|
||||
Trains separate OLS models per property type on recent sales (last 5 years)
|
||||
with sector fixed effects via Frisch-Waugh-Lovell demeaning:
|
||||
|
||||
log(price) = beta_type * log(floor_area) + alpha_sector_type + epsilon
|
||||
|
||||
Each type gets its own floor area elasticity and sector intercepts, capturing
|
||||
that detached houses (beta=0.74) have higher price sensitivity to size than
|
||||
terraced houses (beta=0.60), and a sector's value differs by property type.
|
||||
|
||||
Sector intercepts are hierarchically shrunk (sector → district → area → national)
|
||||
and spatially smoothed via KD-tree nearest neighbors.
|
||||
|
||||
Output: hedonic_model.json with per-type betas and sector intercepts.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from scipy.spatial import KDTree
|
||||
|
||||
from pipeline.transform._price_utils import (
|
||||
CURRENT_YEAR,
|
||||
HEDONIC_COLUMNS,
|
||||
SHRINKAGE_K,
|
||||
TYPE_GROUPS,
|
||||
extract_centroids,
|
||||
hierarchy_keys,
|
||||
sector_expr,
|
||||
type_group_expr,
|
||||
)
|
||||
|
||||
TRAINING_YEARS = 5
|
||||
SPATIAL_NEIGHBORS = 5
|
||||
SPATIAL_BLEND_K = 30
|
||||
|
||||
|
||||
def load_training_data(input_path: Path) -> pl.DataFrame:
|
||||
"""Load recent sales with complete hedonic features."""
|
||||
min_year = CURRENT_YEAR - TRAINING_YEARS
|
||||
print(f"Loading training data (sales {min_year}-{CURRENT_YEAR})...")
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select(*HEDONIC_COLUMNS)
|
||||
.filter(
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
)
|
||||
.with_columns(
|
||||
pl.col("Date of last transaction").dt.year().alias("sale_year"),
|
||||
type_group_expr(),
|
||||
sector_expr(),
|
||||
)
|
||||
.filter(
|
||||
pl.col("type_group").is_not_null(),
|
||||
pl.col("sale_year").is_not_null(),
|
||||
pl.col("sale_year") >= min_year,
|
||||
pl.col("sale_year") <= CURRENT_YEAR,
|
||||
)
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(df):,} complete cases")
|
||||
return df
|
||||
|
||||
|
||||
def train_type_model(
|
||||
df: pl.DataFrame, type_group: str
|
||||
) -> tuple[float, dict[str, float], dict[str, int], float]:
|
||||
"""Train hedonic model for a single property type.
|
||||
|
||||
Returns (beta_fa, sector_intercepts, sector_counts, national_intercept).
|
||||
"""
|
||||
t_df = df.filter(pl.col("type_group") == type_group)
|
||||
y = np.log(t_df["Last known price"].to_numpy().astype(np.float64))
|
||||
log_fa = np.log(
|
||||
np.maximum(t_df["Total floor area (sqm)"].to_numpy().astype(np.float64), 1.0)
|
||||
)
|
||||
X = log_fa.reshape(-1, 1)
|
||||
sectors = t_df["sector"].to_list()
|
||||
|
||||
# Group by sector for demeaning
|
||||
sector_indices: dict[str, list[int]] = {}
|
||||
for i, s in enumerate(sectors):
|
||||
sector_indices.setdefault(s, []).append(i)
|
||||
|
||||
# Compute sector means and demean
|
||||
X_demeaned = np.empty_like(X)
|
||||
y_demeaned = np.empty_like(y)
|
||||
sector_X_means: dict[str, np.ndarray] = {}
|
||||
sector_y_means: dict[str, float] = {}
|
||||
sector_counts: dict[str, int] = {}
|
||||
|
||||
for s, idxs in sector_indices.items():
|
||||
idx = np.array(idxs)
|
||||
X_mean = X[idx].mean(axis=0)
|
||||
y_mean = y[idx].mean()
|
||||
sector_X_means[s] = X_mean
|
||||
sector_y_means[s] = y_mean
|
||||
X_demeaned[idx] = X[idx] - X_mean
|
||||
y_demeaned[idx] = y[idx] - y_mean
|
||||
sector_counts[s] = len(idxs)
|
||||
|
||||
# OLS on demeaned data
|
||||
beta = np.linalg.lstsq(X_demeaned, y_demeaned, rcond=None)[0]
|
||||
beta_fa = float(beta[0])
|
||||
|
||||
# Recover sector intercepts
|
||||
sector_intercepts = {}
|
||||
for s in sector_indices:
|
||||
sector_intercepts[s] = float(sector_y_means[s] - beta_fa * sector_X_means[s][0])
|
||||
|
||||
national_intercept = float(np.mean(list(sector_intercepts.values())))
|
||||
|
||||
# R-squared
|
||||
y_pred = X[:, 0] * beta_fa
|
||||
for i, s in enumerate(sectors):
|
||||
y_pred[i] += sector_intercepts[s]
|
||||
ss_res = np.sum((y - y_pred) ** 2)
|
||||
ss_tot = np.sum((y - y.mean()) ** 2)
|
||||
r2 = 1 - ss_res / ss_tot
|
||||
|
||||
print(
|
||||
f" {type_group:<15s}: n={len(t_df):>9,} β_fa={beta_fa:.4f} "
|
||||
f"R²={r2:.4f} sectors={len(sector_intercepts):,}"
|
||||
)
|
||||
|
||||
return beta_fa, sector_intercepts, sector_counts, national_intercept
|
||||
|
||||
|
||||
def shrink_intercepts(
|
||||
sector_intercepts: dict[str, float],
|
||||
sector_counts: dict[str, int],
|
||||
) -> dict[str, float]:
|
||||
"""Hierarchical shrinkage: sector -> district -> area -> national."""
|
||||
national = float(np.mean(list(sector_intercepts.values())))
|
||||
|
||||
sector_to_dist: dict[str, str] = {}
|
||||
dist_to_area: dict[str, str] = {}
|
||||
for s in sector_intercepts:
|
||||
d, a = hierarchy_keys(s)
|
||||
sector_to_dist[s] = d
|
||||
dist_to_area[d] = a
|
||||
|
||||
# Area-level intercepts (weighted mean of sectors in area)
|
||||
area_vals: dict[str, list[tuple[float, int]]] = {}
|
||||
for s, val in sector_intercepts.items():
|
||||
d = sector_to_dist[s]
|
||||
a = dist_to_area[d]
|
||||
area_vals.setdefault(a, []).append((val, sector_counts.get(s, 0)))
|
||||
|
||||
area_intercepts: dict[str, float] = {}
|
||||
area_counts: dict[str, int] = {}
|
||||
for a, entries in area_vals.items():
|
||||
total_n = sum(n for _, n in entries)
|
||||
if total_n > 0:
|
||||
area_intercepts[a] = sum(v * n for v, n in entries) / total_n
|
||||
else:
|
||||
area_intercepts[a] = sum(v for v, _ in entries) / len(entries)
|
||||
area_counts[a] = total_n
|
||||
|
||||
# District-level intercepts
|
||||
dist_vals: dict[str, list[tuple[float, int]]] = {}
|
||||
for s, val in sector_intercepts.items():
|
||||
d = sector_to_dist[s]
|
||||
dist_vals.setdefault(d, []).append((val, sector_counts.get(s, 0)))
|
||||
|
||||
dist_intercepts: dict[str, float] = {}
|
||||
dist_counts: dict[str, int] = {}
|
||||
for d, entries in dist_vals.items():
|
||||
total_n = sum(n for _, n in entries)
|
||||
if total_n > 0:
|
||||
dist_intercepts[d] = sum(v * n for v, n in entries) / total_n
|
||||
else:
|
||||
dist_intercepts[d] = sum(v for v, _ in entries) / len(entries)
|
||||
dist_counts[d] = total_n
|
||||
|
||||
# Shrink: area -> national
|
||||
area_shrunk: dict[str, float] = {}
|
||||
for a, val in area_intercepts.items():
|
||||
n = area_counts[a]
|
||||
w = n / (n + SHRINKAGE_K)
|
||||
area_shrunk[a] = w * val + (1 - w) * national
|
||||
|
||||
# Shrink: district -> area
|
||||
dist_shrunk: dict[str, float] = {}
|
||||
for d, val in dist_intercepts.items():
|
||||
a = dist_to_area[d]
|
||||
parent = area_shrunk.get(a, national)
|
||||
n = dist_counts[d]
|
||||
w = n / (n + SHRINKAGE_K)
|
||||
dist_shrunk[d] = w * val + (1 - w) * parent
|
||||
|
||||
# Shrink: sector -> district
|
||||
result: dict[str, float] = {}
|
||||
for s, val in sector_intercepts.items():
|
||||
d = sector_to_dist[s]
|
||||
parent = dist_shrunk.get(d, national)
|
||||
n = sector_counts.get(s, 0)
|
||||
w = n / (n + SHRINKAGE_K)
|
||||
result[s] = w * val + (1 - w) * parent
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def spatial_smooth_intercepts(
|
||||
sector_intercepts: dict[str, float],
|
||||
centroids: dict[str, tuple[float, float]],
|
||||
sector_counts: dict[str, int],
|
||||
) -> dict[str, float]:
|
||||
"""Blend sparse sector intercepts with K nearest neighbors."""
|
||||
sectors_with_coords = [s for s in sector_intercepts if s in centroids]
|
||||
if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
|
||||
return sector_intercepts
|
||||
|
||||
coords = np.array([centroids[s] for s in sectors_with_coords])
|
||||
mean_lat = np.mean(coords[:, 0])
|
||||
scale = np.cos(np.radians(mean_lat))
|
||||
scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
|
||||
tree = KDTree(scaled_coords)
|
||||
|
||||
result = dict(sector_intercepts)
|
||||
for i, sec in enumerate(sectors_with_coords):
|
||||
n = sector_counts.get(sec, 0)
|
||||
self_w = n / (n + SPATIAL_BLEND_K)
|
||||
if self_w > 0.95:
|
||||
continue
|
||||
|
||||
dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
|
||||
neighbor_dists = dists[1:]
|
||||
neighbor_idxs = idxs[1:]
|
||||
|
||||
inv_dists = []
|
||||
neighbor_vals = []
|
||||
for d, j in zip(neighbor_dists, neighbor_idxs):
|
||||
ns = sectors_with_coords[j]
|
||||
if d > 0 and ns in sector_intercepts:
|
||||
inv_dists.append(1.0 / d)
|
||||
neighbor_vals.append(sector_intercepts[ns])
|
||||
|
||||
if not neighbor_vals:
|
||||
continue
|
||||
|
||||
total_inv = sum(inv_dists)
|
||||
nbr_w = 1.0 - self_w
|
||||
blended = self_w * sector_intercepts[sec]
|
||||
for val, iw in zip(neighbor_vals, inv_dists):
|
||||
blended += nbr_w * (iw / total_inv) * val
|
||||
result[sec] = blended
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Train cross-sectional hedonic model")
|
||||
parser.add_argument(
|
||||
"--input", type=Path, required=True, help="Path to wide.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output hedonic_model.json"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
df = load_training_data(args.input)
|
||||
centroids = extract_centroids(args.input)
|
||||
|
||||
print("\nTraining per-type models...")
|
||||
type_models = {}
|
||||
total_sectors = 0
|
||||
|
||||
for tg in TYPE_GROUPS:
|
||||
beta_fa, raw_intercepts, sector_counts, national = train_type_model(df, tg)
|
||||
|
||||
shrunk = shrink_intercepts(raw_intercepts, sector_counts)
|
||||
smoothed = spatial_smooth_intercepts(shrunk, centroids, sector_counts)
|
||||
total_sectors += len(smoothed)
|
||||
|
||||
type_models[tg] = {
|
||||
"beta_fa": beta_fa,
|
||||
"sector_intercepts": smoothed,
|
||||
"national_intercept": national,
|
||||
}
|
||||
|
||||
# Output
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.output, "w") as f:
|
||||
json.dump({"type_models": type_models}, f, indent=2)
|
||||
|
||||
size_kb = args.output.stat().st_size / 1024
|
||||
print(f"\nWrote {args.output} ({size_kb:.0f} KB)")
|
||||
print(f" {len(TYPE_GROUPS)} type models, {total_sectors:,} total sector intercepts")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,385 +0,0 @@
|
|||
"""Backtesting: Evaluate price index model on held-out recent sales.
|
||||
|
||||
Test set: properties with 2+ sales where the last sale is 2022-2025.
|
||||
Uses the second-to-last sale as input, predicts the last sale price.
|
||||
Compares index-based prediction against a naive baseline (raw input price).
|
||||
Uses type-stratified index when available, falling back to "All" type.
|
||||
|
||||
Output: backtest_results.parquet with predictions vs actuals.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.transform._price_utils import (
|
||||
CURRENT_YEAR,
|
||||
HEDONIC_COLUMNS,
|
||||
sector_expr,
|
||||
type_group_expr,
|
||||
)
|
||||
|
||||
TEST_YEAR_MIN = 2022
|
||||
|
||||
|
||||
def extract_test_set(
|
||||
input_path: Path, include_hedonic_cols: bool = False
|
||||
) -> pl.DataFrame:
|
||||
"""Extract test pairs: second-to-last sale as input, last sale as ground truth."""
|
||||
print("Loading test set...")
|
||||
cols = ["Postcode", "historical_prices", "Property type"]
|
||||
if include_hedonic_cols:
|
||||
for c in HEDONIC_COLUMNS:
|
||||
if c not in cols:
|
||||
cols.append(c)
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select(cols)
|
||||
.filter(
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("historical_prices").list.len() >= 2,
|
||||
)
|
||||
.with_columns(
|
||||
sector_expr(),
|
||||
type_group_expr(),
|
||||
# Last sale (ground truth)
|
||||
pl.col("historical_prices")
|
||||
.list.last()
|
||||
.struct.field("year")
|
||||
.alias("actual_year"),
|
||||
pl.col("historical_prices")
|
||||
.list.last()
|
||||
.struct.field("price")
|
||||
.alias("actual_price"),
|
||||
# Second-to-last sale (input)
|
||||
pl.col("historical_prices")
|
||||
.list.get(-2)
|
||||
.struct.field("year")
|
||||
.alias("input_year"),
|
||||
pl.col("historical_prices")
|
||||
.list.get(-2)
|
||||
.struct.field("price")
|
||||
.alias("input_price"),
|
||||
)
|
||||
.filter(
|
||||
pl.col("actual_year") >= TEST_YEAR_MIN,
|
||||
pl.col("input_price") > 0,
|
||||
pl.col("actual_price") > 0,
|
||||
pl.col("actual_year") > pl.col("input_year"),
|
||||
)
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(df):,} test pairs (last sale {TEST_YEAR_MIN}-{CURRENT_YEAR})")
|
||||
return df
|
||||
|
||||
|
||||
def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Index-based prediction with type-stratified fallback."""
|
||||
has_type_group = "type_group" in index.columns
|
||||
|
||||
if has_type_group:
|
||||
idx_typed = index.filter(pl.col("type_group") != "All")
|
||||
idx_all = index.filter(pl.col("type_group") == "All")
|
||||
|
||||
# Join type-specific index at input year
|
||||
test = test.join(
|
||||
idx_typed.select(
|
||||
"sector", "type_group", "year", pl.col("log_index").alias("li_in_typed")
|
||||
),
|
||||
left_on=["sector", "type_group", "input_year"],
|
||||
right_on=["sector", "type_group", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join "All" index at input year
|
||||
test = test.join(
|
||||
idx_all.select("sector", "year", pl.col("log_index").alias("li_in_all")),
|
||||
left_on=["sector", "input_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join type-specific index at actual year
|
||||
test = test.join(
|
||||
idx_typed.select(
|
||||
"sector",
|
||||
"type_group",
|
||||
"year",
|
||||
pl.col("log_index").alias("li_act_typed"),
|
||||
),
|
||||
left_on=["sector", "type_group", "actual_year"],
|
||||
right_on=["sector", "type_group", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join "All" index at actual year
|
||||
test = test.join(
|
||||
idx_all.select("sector", "year", pl.col("log_index").alias("li_act_all")),
|
||||
left_on=["sector", "actual_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
test = test.with_columns(
|
||||
pl.col("li_in_typed")
|
||||
.fill_null(pl.col("li_in_all"))
|
||||
.alias("log_index_input"),
|
||||
pl.col("li_act_typed")
|
||||
.fill_null(pl.col("li_act_all"))
|
||||
.alias("log_index_actual"),
|
||||
)
|
||||
else:
|
||||
# Unstratified index
|
||||
test = test.join(
|
||||
index.select(
|
||||
"sector", "year", pl.col("log_index").alias("log_index_input")
|
||||
),
|
||||
left_on=["sector", "input_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
test = test.join(
|
||||
index.select(
|
||||
"sector", "year", pl.col("log_index").alias("log_index_actual")
|
||||
),
|
||||
left_on=["sector", "actual_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
test = test.with_columns(
|
||||
(
|
||||
pl.col("input_price").cast(pl.Float64)
|
||||
* (pl.col("log_index_actual") - pl.col("log_index_input")).exp()
|
||||
)
|
||||
.fill_null(pl.col("input_price").cast(pl.Float64))
|
||||
.alias("predicted"),
|
||||
)
|
||||
return test
|
||||
|
||||
|
||||
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
|
||||
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0)
|
||||
actual = actual[valid]
|
||||
predicted = predicted[valid]
|
||||
|
||||
ape = np.abs(predicted - actual) / actual
|
||||
signed_err = predicted - actual
|
||||
|
||||
return {
|
||||
"MdAPE (%)": float(np.median(ape) * 100),
|
||||
"% within 10%": float(np.mean(ape <= 0.10) * 100),
|
||||
"% within 20%": float(np.mean(ape <= 0.20) * 100),
|
||||
"% within 30%": float(np.mean(ape <= 0.30) * 100),
|
||||
"MAE (£)": float(np.mean(np.abs(signed_err))),
|
||||
"Mean signed error (£)": float(np.mean(signed_err)),
|
||||
"n": int(len(actual)),
|
||||
}
|
||||
|
||||
|
||||
def print_metrics_table(metrics_by_stage: dict):
|
||||
print("\n" + "=" * 55)
|
||||
print("BACKTEST RESULTS")
|
||||
print("=" * 55)
|
||||
|
||||
metric_names = [
|
||||
"MdAPE (%)",
|
||||
"% within 10%",
|
||||
"% within 20%",
|
||||
"% within 30%",
|
||||
"MAE (£)",
|
||||
"Mean signed error (£)",
|
||||
"n",
|
||||
]
|
||||
stages = list(metrics_by_stage.keys())
|
||||
|
||||
header = f"{'Metric':<25s}"
|
||||
for stage in stages:
|
||||
header += f" {stage:>14s}"
|
||||
print(header)
|
||||
print("-" * 55)
|
||||
|
||||
for metric in metric_names:
|
||||
row = f"{metric:<25s}"
|
||||
for stage in stages:
|
||||
val = metrics_by_stage[stage][metric]
|
||||
if metric == "n":
|
||||
row += f" {val:>14,d}"
|
||||
elif "£" in metric:
|
||||
row += f" {val:>13,.0f}"
|
||||
else:
|
||||
row += f" {val:>13.1f}%"
|
||||
print(row)
|
||||
|
||||
print("=" * 55)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backtest price estimation model")
|
||||
parser.add_argument(
|
||||
"--input", type=Path, required=True, help="Path to wide.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--index", type=Path, required=True, help="Path to price_index.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output backtest_results.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hedonic-model",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Path to hedonic_model.json (optional)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
index = pl.read_parquet(args.index)
|
||||
has_type_group = "type_group" in index.columns
|
||||
if has_type_group:
|
||||
print(
|
||||
f"Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
|
||||
f"{index['type_group'].n_unique()} type groups"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors"
|
||||
)
|
||||
|
||||
has_hedonic = args.hedonic_model is not None
|
||||
test = extract_test_set(args.input, include_hedonic_cols=has_hedonic)
|
||||
|
||||
print("\nPredicting with price index...")
|
||||
test = predict(test, index)
|
||||
|
||||
# Compute and print metrics
|
||||
actual = test["actual_price"].to_numpy().astype(np.float64)
|
||||
metrics = {
|
||||
"Naive": compute_metrics(
|
||||
actual, test["input_price"].to_numpy().astype(np.float64)
|
||||
),
|
||||
"Index": compute_metrics(
|
||||
actual, test["predicted"].to_numpy().astype(np.float64)
|
||||
),
|
||||
}
|
||||
|
||||
# Hedonic blending
|
||||
if has_hedonic:
|
||||
print("\nApplying hedonic blending...")
|
||||
with open(args.hedonic_model) as f:
|
||||
model = json.load(f)
|
||||
type_models = model["type_models"]
|
||||
|
||||
# Identify eligible rows for hedonic estimate
|
||||
hedonic_mask = (
|
||||
pl.col("Total floor area (sqm)").is_not_null()
|
||||
& (pl.col("Total floor area (sqm)") > 0)
|
||||
& pl.col("type_group").is_not_null()
|
||||
)
|
||||
eligible_mask = test.select(hedonic_mask).to_series()
|
||||
eligible = test.filter(eligible_mask)
|
||||
|
||||
if len(eligible) > 0:
|
||||
log_fa = np.log(
|
||||
np.maximum(
|
||||
eligible["Total floor area (sqm)"].to_numpy().astype(np.float64),
|
||||
1.0,
|
||||
)
|
||||
)
|
||||
sectors = eligible["sector"].to_list()
|
||||
types = eligible["type_group"].to_list()
|
||||
|
||||
# Per-type hedonic prediction
|
||||
log_hedonic = np.empty(len(eligible))
|
||||
for i in range(len(eligible)):
|
||||
tm = type_models.get(types[i])
|
||||
if tm is None:
|
||||
log_hedonic[i] = np.nan
|
||||
continue
|
||||
alpha = tm["sector_intercepts"].get(
|
||||
sectors[i], tm["national_intercept"]
|
||||
)
|
||||
log_hedonic[i] = tm["beta_fa"] * log_fa[i] + alpha
|
||||
|
||||
valid = np.isfinite(log_hedonic)
|
||||
|
||||
# Hold years: input_year to actual_year (simulating real prediction)
|
||||
input_years = eligible["input_year"].to_numpy().astype(np.float64)
|
||||
actual_years = eligible["actual_year"].to_numpy().astype(np.float64)
|
||||
hold_years = np.maximum(actual_years - input_years, 0.0)
|
||||
|
||||
log_index_pred = np.log(
|
||||
np.maximum(eligible["predicted"].to_numpy().astype(np.float64), 1.0)
|
||||
)
|
||||
|
||||
# Sweep tau values (only on valid hedonic rows)
|
||||
tau_values = [5.0, 10.0, 15.0, 20.0, 30.0]
|
||||
actual_eligible = eligible["actual_price"].to_numpy().astype(np.float64)
|
||||
best_tau = 15.0
|
||||
best_mdape = float("inf")
|
||||
|
||||
print(f"\n tau sweep ({valid.sum():,} eligible properties):")
|
||||
for tau in tau_values:
|
||||
blend_w = hold_years / (hold_years + tau)
|
||||
log_blended = np.where(
|
||||
valid,
|
||||
(1 - blend_w) * log_index_pred + blend_w * log_hedonic,
|
||||
log_index_pred,
|
||||
)
|
||||
blended = np.exp(log_blended)
|
||||
m = compute_metrics(actual_eligible, blended)
|
||||
marker = ""
|
||||
if m["MdAPE (%)"] < best_mdape:
|
||||
best_mdape = m["MdAPE (%)"]
|
||||
best_tau = tau
|
||||
marker = " <-- best"
|
||||
print(
|
||||
f" tau={tau:>4.0f}: MdAPE={m['MdAPE (%)']:>5.1f}%, "
|
||||
f"within 10%={m['% within 10%']:>5.1f}%{marker}"
|
||||
)
|
||||
|
||||
print(f"\n Best tau = {best_tau}")
|
||||
|
||||
# Compute blended predictions with best tau for full test set
|
||||
blend_w = hold_years / (hold_years + best_tau)
|
||||
log_blended = np.where(
|
||||
valid,
|
||||
(1 - blend_w) * log_index_pred + blend_w * log_hedonic,
|
||||
log_index_pred,
|
||||
)
|
||||
blended_eligible = np.exp(log_blended)
|
||||
|
||||
# Merge back: for non-eligible rows, use index prediction
|
||||
blended_all = test["predicted"].to_numpy().astype(np.float64).copy()
|
||||
eligible_indices = eligible_mask.arg_true()
|
||||
for i, idx in enumerate(eligible_indices):
|
||||
blended_all[idx] = blended_eligible[i]
|
||||
|
||||
test = test.with_columns(
|
||||
pl.Series("blended", blended_all, dtype=pl.Float64),
|
||||
)
|
||||
metrics["Blended"] = compute_metrics(actual, blended_all)
|
||||
|
||||
print_metrics_table(metrics)
|
||||
|
||||
# Save results
|
||||
result_cols = [
|
||||
"Postcode",
|
||||
"sector",
|
||||
"input_year",
|
||||
"input_price",
|
||||
"actual_year",
|
||||
"actual_price",
|
||||
"predicted",
|
||||
]
|
||||
if "blended" in test.columns:
|
||||
result_cols.append("blended")
|
||||
result = test.select(result_cols)
|
||||
|
||||
result.write_parquet(args.output)
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
|
||||
print(f" {len(result):,} rows")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,414 +0,0 @@
|
|||
"""Augment wide.parquet with an estimated current price column.
|
||||
|
||||
Joins the precomputed repeat-sales price index (from price_index.py) with each
|
||||
property's last known sale to produce an inflation-adjusted current price estimate.
|
||||
Uses type-stratified index when available, falling back to "All" type.
|
||||
|
||||
Optionally applies renovation premiums from renovation_premium.py: for properties
|
||||
with post-sale renovation events, the estimated price is adjusted upward based on
|
||||
data-driven per-area premiums with time decay.
|
||||
|
||||
Modifies wide.parquet in-place, adding the "Estimated current price" column.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.transform._price_utils import (
|
||||
CURRENT_YEAR,
|
||||
sector_expr,
|
||||
type_group_expr,
|
||||
)
|
||||
|
||||
HALF_LIFE = 10.0
|
||||
DECAY_RATE = math.log(2) / HALF_LIFE
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Augment wide.parquet with estimated current prices"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to wide.parquet (modified in-place)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--index", type=Path, required=True, help="Path to price_index.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--renovation-premium",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Path to renovation_premium.parquet (optional)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hedonic-model",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Path to hedonic_model.json (optional)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Loading wide.parquet...")
|
||||
df = pl.read_parquet(args.input)
|
||||
print(f" {len(df):,} rows, {len(df.columns)} columns")
|
||||
|
||||
# Drop existing estimated columns if re-running
|
||||
for col in ["Estimated current price", "Est. price per sqm"]:
|
||||
if col in df.columns:
|
||||
df = df.drop(col)
|
||||
|
||||
# Derive helper columns for the join
|
||||
has_price = (
|
||||
pl.col("Last known price").is_not_null()
|
||||
& pl.col("Postcode").is_not_null()
|
||||
& pl.col("Date of last transaction").is_not_null()
|
||||
)
|
||||
|
||||
df = df.with_columns(
|
||||
sector_expr().alias("_sector"),
|
||||
pl.col("Date of last transaction").dt.year().alias("_sale_year"),
|
||||
type_group_expr().alias("_type_group"),
|
||||
)
|
||||
|
||||
index = pl.read_parquet(args.index)
|
||||
has_type_group = "type_group" in index.columns
|
||||
if has_type_group:
|
||||
print(
|
||||
f" Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
|
||||
f"{index['type_group'].n_unique()} type groups"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors (unstratified)"
|
||||
)
|
||||
|
||||
print("\nApplying repeat-sales index...")
|
||||
|
||||
if has_type_group:
|
||||
idx_typed = index.filter(pl.col("type_group") != "All")
|
||||
idx_all = index.filter(pl.col("type_group") == "All")
|
||||
|
||||
# Join type-specific index at sale year
|
||||
df = df.join(
|
||||
idx_typed.select(
|
||||
"sector",
|
||||
"type_group",
|
||||
"year",
|
||||
pl.col("log_index").alias("log_idx_sale_typed"),
|
||||
),
|
||||
left_on=["_sector", "_type_group", "_sale_year"],
|
||||
right_on=["sector", "type_group", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join "All" index at sale year
|
||||
df = df.join(
|
||||
idx_all.select(
|
||||
"sector", "year", pl.col("log_index").alias("log_idx_sale_all")
|
||||
),
|
||||
left_on=["_sector", "_sale_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join type-specific index at current year
|
||||
df = df.join(
|
||||
idx_typed.filter(pl.col("year") == CURRENT_YEAR).select(
|
||||
"sector", "type_group", pl.col("log_index").alias("log_idx_cur_typed")
|
||||
),
|
||||
left_on=["_sector", "_type_group"],
|
||||
right_on=["sector", "type_group"],
|
||||
how="left",
|
||||
)
|
||||
# Join "All" index at current year
|
||||
df = df.join(
|
||||
idx_all.filter(pl.col("year") == CURRENT_YEAR).select(
|
||||
"sector", pl.col("log_index").alias("log_idx_cur_all")
|
||||
),
|
||||
left_on="_sector",
|
||||
right_on="sector",
|
||||
how="left",
|
||||
)
|
||||
|
||||
df = df.with_columns(
|
||||
pl.col("log_idx_sale_typed")
|
||||
.fill_null(pl.col("log_idx_sale_all"))
|
||||
.alias("_log_index_sale"),
|
||||
pl.col("log_idx_cur_typed")
|
||||
.fill_null(pl.col("log_idx_cur_all"))
|
||||
.alias("_log_index_current"),
|
||||
)
|
||||
else:
|
||||
df = df.join(
|
||||
index.select(
|
||||
"sector", "year", pl.col("log_index").alias("_log_index_sale")
|
||||
),
|
||||
left_on=["_sector", "_sale_year"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
index_current = index.filter(pl.col("year") == CURRENT_YEAR).select(
|
||||
"sector", pl.col("log_index").alias("_log_index_current")
|
||||
)
|
||||
df = df.join(index_current, left_on="_sector", right_on="sector", how="left")
|
||||
|
||||
# Compute estimate — only for rows with a known price
|
||||
df = df.with_columns(
|
||||
pl.when(has_price)
|
||||
.then(
|
||||
pl.col("Last known price").cast(pl.Float64)
|
||||
* (pl.col("_log_index_current") - pl.col("_log_index_sale")).exp()
|
||||
)
|
||||
.otherwise(pl.lit(None))
|
||||
.alias("Estimated current price"),
|
||||
)
|
||||
|
||||
n_adjusted = df.filter(has_price & pl.col("_log_index_sale").is_not_null()).height
|
||||
n_with_price = df.filter(has_price).height
|
||||
print(
|
||||
f" {n_adjusted:,} of {n_with_price:,} properties adjusted by index ({n_adjusted / max(n_with_price, 1) * 100:.1f}%)"
|
||||
)
|
||||
|
||||
# Apply hedonic blending if model provided
|
||||
if args.hedonic_model is not None:
|
||||
print("\nApplying hedonic blending...")
|
||||
with open(args.hedonic_model) as f:
|
||||
model = json.load(f)
|
||||
type_models = model["type_models"]
|
||||
tau = model.get("tau", 15.0)
|
||||
print(f" tau = {tau}, {len(type_models)} type models")
|
||||
|
||||
# Add type_group for per-type lookup
|
||||
df = df.with_columns(type_group_expr())
|
||||
hedonic_mask = (
|
||||
has_price
|
||||
& pl.col("Estimated current price").is_not_null()
|
||||
& pl.col("Total floor area (sqm)").is_not_null()
|
||||
& (pl.col("Total floor area (sqm)") > 0)
|
||||
& pl.col("type_group").is_not_null()
|
||||
)
|
||||
eligible = df.filter(hedonic_mask)
|
||||
|
||||
if len(eligible) > 0:
|
||||
log_fa = np.log(
|
||||
np.maximum(
|
||||
eligible["Total floor area (sqm)"].to_numpy().astype(np.float64),
|
||||
1.0,
|
||||
)
|
||||
)
|
||||
sectors = eligible["_sector"].to_list()
|
||||
types = eligible["type_group"].to_list()
|
||||
|
||||
# Per-type hedonic prediction
|
||||
log_hedonic = np.empty(len(eligible))
|
||||
for i in range(len(eligible)):
|
||||
tm = type_models.get(types[i])
|
||||
if tm is None:
|
||||
log_hedonic[i] = np.nan
|
||||
continue
|
||||
alpha = tm["sector_intercepts"].get(
|
||||
sectors[i], tm["national_intercept"]
|
||||
)
|
||||
log_hedonic[i] = tm["beta_fa"] * log_fa[i] + alpha
|
||||
|
||||
valid = np.isfinite(log_hedonic)
|
||||
|
||||
# Hold years and blend weight
|
||||
sale_years = eligible["_sale_year"].to_numpy().astype(np.float64)
|
||||
hold_years = np.maximum(CURRENT_YEAR - sale_years, 0.0)
|
||||
blend_w = hold_years / (hold_years + tau)
|
||||
|
||||
# Blend in log space
|
||||
log_index_est = np.log(
|
||||
eligible["Estimated current price"].to_numpy().astype(np.float64)
|
||||
)
|
||||
log_blended = np.where(
|
||||
valid,
|
||||
(1 - blend_w) * log_index_est + blend_w * log_hedonic,
|
||||
log_index_est,
|
||||
)
|
||||
blended_prices = np.exp(log_blended)
|
||||
|
||||
# Write back into df
|
||||
eligible_indices = df.select(hedonic_mask).to_series().arg_true()
|
||||
price_arr = df["Estimated current price"].to_numpy().astype(np.float64)
|
||||
for i, idx in enumerate(eligible_indices):
|
||||
price_arr[idx] = blended_prices[i]
|
||||
df = df.with_columns(
|
||||
pl.Series("Estimated current price", price_arr, dtype=pl.Float64),
|
||||
)
|
||||
|
||||
n_blended = int(valid.sum())
|
||||
avg_w = float(np.mean(blend_w[valid]))
|
||||
print(
|
||||
f" {n_blended:,} properties with hedonic blending (avg blend weight: {avg_w:.3f})"
|
||||
)
|
||||
else:
|
||||
print(" No eligible properties for hedonic blending")
|
||||
|
||||
# Apply renovation premiums if provided
|
||||
if args.renovation_premium is not None:
|
||||
print("\nApplying renovation premiums...")
|
||||
reno_prem = pl.read_parquet(args.renovation_premium)
|
||||
print(f" Loaded {len(reno_prem):,} premium rows")
|
||||
|
||||
# Find properties with post-sale renovation events
|
||||
has_reno = (
|
||||
pl.col("renovation_history").is_not_null()
|
||||
& (pl.col("renovation_history").list.len() > 0)
|
||||
& pl.col("Estimated current price").is_not_null()
|
||||
)
|
||||
|
||||
# Explode renovation events, filter to post-sale only
|
||||
reno_rows = (
|
||||
df.lazy()
|
||||
.filter(has_reno)
|
||||
.select("_sector", "_type_group", "_sale_year", "renovation_history")
|
||||
.with_row_index("_row_idx")
|
||||
.explode("renovation_history")
|
||||
.with_columns(
|
||||
pl.col("renovation_history").struct.field("year").alias("_event_year"),
|
||||
pl.col("renovation_history").struct.field("event").alias("_event_type"),
|
||||
)
|
||||
.filter(pl.col("_event_year") > pl.col("_sale_year"))
|
||||
.collect()
|
||||
)
|
||||
|
||||
if len(reno_rows) > 0:
|
||||
# Take most recent event per (row, event_type)
|
||||
latest = (
|
||||
reno_rows.lazy()
|
||||
.group_by("_row_idx", "_event_type", "_sector", "_type_group")
|
||||
.agg(pl.col("_event_year").max().alias("_event_year"))
|
||||
.collect()
|
||||
)
|
||||
|
||||
# Compute time-decayed premium
|
||||
latest = latest.with_columns(
|
||||
(-DECAY_RATE * (CURRENT_YEAR - pl.col("_event_year")).cast(pl.Float64))
|
||||
.exp()
|
||||
.alias("_decay"),
|
||||
)
|
||||
|
||||
# Join with renovation_premium.parquet — try typed first, fall back to "All"
|
||||
rp_typed = reno_prem.filter(pl.col("type_group") != "All")
|
||||
rp_all = reno_prem.filter(pl.col("type_group") == "All")
|
||||
|
||||
latest = (
|
||||
latest.join(
|
||||
rp_typed.select(
|
||||
"sector",
|
||||
"type_group",
|
||||
"event_type",
|
||||
pl.col("log_premium").alias("_lp_typed"),
|
||||
),
|
||||
left_on=["_sector", "_type_group", "_event_type"],
|
||||
right_on=["sector", "type_group", "event_type"],
|
||||
how="left",
|
||||
)
|
||||
.join(
|
||||
rp_all.select(
|
||||
"sector", "event_type", pl.col("log_premium").alias("_lp_all")
|
||||
),
|
||||
left_on=["_sector", "_event_type"],
|
||||
right_on=["sector", "event_type"],
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
pl.col("_lp_typed")
|
||||
.fill_null(pl.col("_lp_all"))
|
||||
.fill_null(0.0)
|
||||
.alias("_log_premium"),
|
||||
)
|
||||
)
|
||||
|
||||
# Compute total decayed log premium per property
|
||||
per_property = (
|
||||
latest.lazy()
|
||||
.with_columns(
|
||||
(pl.col("_log_premium") * pl.col("_decay")).alias("_decayed_lp"),
|
||||
)
|
||||
.group_by("_row_idx")
|
||||
.agg(pl.col("_decayed_lp").sum().alias("_reno_log_premium"))
|
||||
.collect()
|
||||
)
|
||||
|
||||
# We need to map _row_idx back to the main df. Re-derive the row indices.
|
||||
# _row_idx was generated from filtered rows — we need the actual df row indices.
|
||||
reno_mask = df.select(has_reno).to_series()
|
||||
actual_indices = reno_mask.arg_true()
|
||||
|
||||
# Build a mapping: _row_idx -> actual df row
|
||||
idx_map = per_property.with_columns(
|
||||
pl.col("_row_idx")
|
||||
.map_elements(
|
||||
lambda i: int(actual_indices[i]),
|
||||
return_dtype=pl.UInt32,
|
||||
)
|
||||
.alias("_df_row"),
|
||||
)
|
||||
|
||||
# Create a full-length column of zeros, then fill in premium values
|
||||
reno_log_prem = [0.0] * len(df)
|
||||
for row in idx_map.iter_rows(named=True):
|
||||
reno_log_prem[row["_df_row"]] = row["_reno_log_premium"]
|
||||
|
||||
df = df.with_columns(
|
||||
pl.Series("_reno_log_premium", reno_log_prem, dtype=pl.Float64),
|
||||
)
|
||||
|
||||
# Apply: multiply estimated price by exp(reno_log_premium) where premium > 0
|
||||
df = df.with_columns(
|
||||
pl.when(pl.col("_reno_log_premium") != 0.0)
|
||||
.then(
|
||||
pl.col("Estimated current price")
|
||||
* pl.col("_reno_log_premium").exp()
|
||||
)
|
||||
.otherwise(pl.col("Estimated current price"))
|
||||
.alias("Estimated current price"),
|
||||
)
|
||||
|
||||
n_with_premium = idx_map.height
|
||||
avg_multiplier = math.exp(
|
||||
per_property["_reno_log_premium"]
|
||||
.filter(per_property["_reno_log_premium"] != 0.0)
|
||||
.mean()
|
||||
)
|
||||
print(f" {n_with_premium:,} properties with renovation premium applied")
|
||||
print(
|
||||
f" Average premium multiplier: {avg_multiplier:.3f} ({avg_multiplier - 1:.1%} uplift)"
|
||||
)
|
||||
else:
|
||||
print(" No properties with post-sale renovation events")
|
||||
|
||||
# Derive estimated price per sqm where both estimated price and floor area exist
|
||||
df = df.with_columns(
|
||||
(pl.col("Estimated current price") / pl.col("Total floor area (sqm)"))
|
||||
.round(0)
|
||||
.cast(pl.Int32)
|
||||
.alias("Est. price per sqm"),
|
||||
)
|
||||
|
||||
# Drop all temporary columns
|
||||
temp_cols = [c for c in df.columns if c.startswith("_") or c.startswith("log_idx_")]
|
||||
# Also drop hedonic-derived column if it was added
|
||||
if "type_group" in df.columns:
|
||||
temp_cols.append("type_group")
|
||||
df = df.drop(temp_cols)
|
||||
|
||||
df.write_parquet(args.input)
|
||||
size_mb = args.input.stat().st_size / (1024 * 1024)
|
||||
print(f"\nWrote {args.input} ({size_mb:.1f} MB)")
|
||||
print(
|
||||
f" {len(df):,} rows, {len(df.columns)} columns (including 'Estimated current price')"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,523 +0,0 @@
|
|||
"""Repeat-Sales Price Index (improved)
|
||||
|
||||
Builds a hierarchical repeat-sales price index with:
|
||||
1. Stratification by property type (Detached/Semi-Detached/Terraced/Flats)
|
||||
2. Robust regression (IRLS with Huber weights) instead of hard outlier cutoff
|
||||
3. National hedonic time-dummy model as ultimate shrinkage fallback
|
||||
4. Spatial smoothing for sparse sectors via KD-tree nearest neighbors
|
||||
|
||||
Output: price_index.parquet — sector × type_group × year → log_index
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from scipy.sparse import csc_matrix
|
||||
from scipy.sparse.linalg import lsqr
|
||||
from scipy.spatial import KDTree
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.transform._price_utils import (
|
||||
CURRENT_YEAR,
|
||||
SHRINKAGE_K,
|
||||
TYPE_GROUPS,
|
||||
build_hedonic_features,
|
||||
extract_centroids,
|
||||
hierarchy_keys,
|
||||
sector_expr,
|
||||
type_group_expr,
|
||||
)
|
||||
|
||||
# --- Constants ---
|
||||
MIN_PAIRS = 5
|
||||
OUTLIER_THRESHOLD = 3.0 # hard pre-filter; Huber handles the rest
|
||||
HUBER_K = 1.345
|
||||
IRLS_ITERATIONS = 5
|
||||
SPATIAL_NEIGHBORS = 5
|
||||
SPATIAL_BLEND_K = 30
|
||||
|
||||
|
||||
# --- Pair extraction ---
|
||||
|
||||
|
||||
def extract_pairs(input_path: Path) -> pl.DataFrame:
|
||||
print("Extracting repeat-sale pairs...")
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select("Postcode", "historical_prices", "Property type")
|
||||
.filter(
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("historical_prices").list.len() >= 2,
|
||||
)
|
||||
.with_columns(sector_expr(), type_group_expr())
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(df):,} properties with 2+ transactions")
|
||||
|
||||
pairs = (
|
||||
df.lazy()
|
||||
.with_columns(
|
||||
pl.col("historical_prices")
|
||||
.list.slice(0, pl.col("historical_prices").list.len() - 1)
|
||||
.alias("from_txn"),
|
||||
pl.col("historical_prices").list.slice(1).alias("to_txn"),
|
||||
)
|
||||
.explode("from_txn", "to_txn")
|
||||
.with_columns(
|
||||
pl.col("from_txn").struct.field("year").alias("year1"),
|
||||
pl.col("from_txn").struct.field("price").alias("price1"),
|
||||
pl.col("to_txn").struct.field("year").alias("year2"),
|
||||
pl.col("to_txn").struct.field("price").alias("price2"),
|
||||
)
|
||||
.select("sector", "type_group", "year1", "price1", "year2", "price2")
|
||||
.filter(
|
||||
pl.col("price1") > 0,
|
||||
pl.col("price2") > 0,
|
||||
pl.col("year2") > pl.col("year1"),
|
||||
)
|
||||
.with_columns(
|
||||
(pl.col("price2").cast(pl.Float64) / pl.col("price1").cast(pl.Float64))
|
||||
.log()
|
||||
.alias("log_ratio"),
|
||||
(1.0 / (pl.col("year2") - pl.col("year1")).cast(pl.Float64).sqrt()).alias(
|
||||
"weight"
|
||||
),
|
||||
)
|
||||
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
|
||||
.collect()
|
||||
)
|
||||
|
||||
# Add hierarchy columns
|
||||
pairs = pairs.with_columns(
|
||||
pl.col("sector").str.replace(r"\s+\d+$", "").alias("district"),
|
||||
).with_columns(
|
||||
pl.col("district").str.replace(r"\d.*$", "").alias("area"),
|
||||
)
|
||||
|
||||
print(f" {len(pairs):,} pairs extracted")
|
||||
return pairs
|
||||
|
||||
|
||||
# --- Robust IRLS solver ---
|
||||
|
||||
|
||||
def solve_robust_index(
|
||||
years1: np.ndarray,
|
||||
years2: np.ndarray,
|
||||
log_ratios: np.ndarray,
|
||||
base_weights: np.ndarray,
|
||||
) -> dict[int, float]:
|
||||
"""IRLS Huber M-estimation for the Case-Shiller repeat-sales model."""
|
||||
n = len(years1)
|
||||
if n < MIN_PAIRS:
|
||||
return {}
|
||||
|
||||
all_years = np.union1d(years1, years2)
|
||||
min_year = int(all_years.min())
|
||||
|
||||
col = 0
|
||||
year_to_col = {}
|
||||
for y in all_years:
|
||||
iy = int(y)
|
||||
if iy != min_year:
|
||||
year_to_col[iy] = col
|
||||
col += 1
|
||||
n_cols = len(year_to_col)
|
||||
if n_cols == 0:
|
||||
return {}
|
||||
|
||||
# Vectorized column index mapping
|
||||
col2 = np.full(n, -1, dtype=np.int32)
|
||||
col1 = np.full(n, -1, dtype=np.int32)
|
||||
for year, c in year_to_col.items():
|
||||
col2[years2 == year] = c
|
||||
col1[years1 == year] = c
|
||||
|
||||
# Sparse matrix structure (fixed across iterations)
|
||||
mask2 = col2 >= 0
|
||||
mask1 = col1 >= 0
|
||||
rows_arr = np.concatenate([np.where(mask2)[0], np.where(mask1)[0]])
|
||||
cols_arr = np.concatenate([col2[mask2], col1[mask1]])
|
||||
signs_arr = np.concatenate([np.ones(mask2.sum()), -np.ones(mask1.sum())])
|
||||
|
||||
weights = base_weights.copy()
|
||||
|
||||
for _ in range(IRLS_ITERATIONS):
|
||||
data = signs_arr * weights[rows_arr]
|
||||
A = csc_matrix((data, (rows_arr, cols_arr)), shape=(n, n_cols))
|
||||
b = log_ratios * weights
|
||||
betas = lsqr(A, b, atol=1e-10, btol=1e-10)[0]
|
||||
|
||||
# Residuals
|
||||
predicted = np.zeros(n)
|
||||
predicted[mask2] += betas[col2[mask2]]
|
||||
predicted[mask1] -= betas[col1[mask1]]
|
||||
residuals = log_ratios - predicted
|
||||
|
||||
# Huber reweighting
|
||||
abs_r = np.abs(residuals)
|
||||
huber_w = np.where(abs_r <= HUBER_K, 1.0, HUBER_K / np.maximum(abs_r, 1e-10))
|
||||
weights = base_weights * huber_w
|
||||
|
||||
index = {min_year: 0.0}
|
||||
for year, c in year_to_col.items():
|
||||
index[year] = float(betas[c])
|
||||
return index
|
||||
|
||||
|
||||
def compute_indices_for_level(pairs: pl.DataFrame, group_col: str):
|
||||
"""Solve robust indices for each group. Returns (indices, n_pairs) dicts."""
|
||||
groups = pairs.group_by(group_col).agg(
|
||||
pl.col("year1"),
|
||||
pl.col("year2"),
|
||||
pl.col("log_ratio"),
|
||||
pl.col("weight"),
|
||||
)
|
||||
indices = {}
|
||||
n_pairs = {}
|
||||
for row in tqdm(
|
||||
groups.iter_rows(named=True), total=len(groups), desc=f" {group_col}"
|
||||
):
|
||||
key = row[group_col]
|
||||
y1 = np.array(row["year1"], dtype=np.int32)
|
||||
y2 = np.array(row["year2"], dtype=np.int32)
|
||||
lr = np.array(row["log_ratio"], dtype=np.float64)
|
||||
w = np.array(row["weight"], dtype=np.float64)
|
||||
idx = solve_robust_index(y1, y2, lr, w)
|
||||
if idx:
|
||||
indices[key] = idx
|
||||
n_pairs[key] = len(y1)
|
||||
return indices, n_pairs
|
||||
|
||||
|
||||
# --- Hedonic model ---
|
||||
|
||||
|
||||
def compute_hedonic_index(
|
||||
input_path: Path, min_year: int, max_year: int
|
||||
) -> dict[int, float]:
|
||||
"""Two-step hedonic index: regress log(price) on features, average residual by year."""
|
||||
print("Computing hedonic index...")
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select(
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
"Property type",
|
||||
"Total floor area (sqm)",
|
||||
)
|
||||
.filter(
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
)
|
||||
.with_columns(
|
||||
pl.col("Date of last transaction").dt.year().alias("sale_year"),
|
||||
type_group_expr(),
|
||||
)
|
||||
.filter(
|
||||
pl.col("type_group").is_not_null(),
|
||||
pl.col("sale_year").is_not_null(),
|
||||
pl.col("sale_year") >= min_year,
|
||||
pl.col("sale_year") <= max_year,
|
||||
)
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(df):,} complete cases for hedonic model")
|
||||
|
||||
# Target
|
||||
log_price = np.log(df["Last known price"].to_numpy().astype(np.float64))
|
||||
sale_years = df["sale_year"].to_numpy()
|
||||
|
||||
# Build feature matrix (18 hedonic features + intercept)
|
||||
X = build_hedonic_features(df)
|
||||
F = np.hstack([X, np.ones((len(df), 1), dtype=np.float32)])
|
||||
print(f" Feature matrix: {F.shape[0]:,} × {F.shape[1]}")
|
||||
|
||||
# Step 1: regress log(price) on features → quality score
|
||||
betas = np.linalg.lstsq(F.astype(np.float64), log_price, rcond=None)[0]
|
||||
quality_score = F.astype(np.float64) @ betas
|
||||
residuals = log_price - quality_score
|
||||
|
||||
# Step 2: average residual by year = hedonic index
|
||||
hedonic = {}
|
||||
for y in range(min_year, max_year + 1):
|
||||
mask = sale_years == y
|
||||
if mask.sum() > 0:
|
||||
hedonic[y] = float(np.mean(residuals[mask]))
|
||||
|
||||
# Normalize: min_year = 0
|
||||
base = hedonic.get(min_year, 0.0)
|
||||
for y in hedonic:
|
||||
hedonic[y] -= base
|
||||
|
||||
print(
|
||||
f" Hedonic index: {len(hedonic)} years, range {min(hedonic.values()):.3f} to {max(hedonic.values()):.3f}"
|
||||
)
|
||||
return hedonic
|
||||
|
||||
|
||||
# --- Shrinkage ---
|
||||
|
||||
|
||||
def shrink_index(raw: dict, parent: dict, n_pairs: int, k: int = SHRINKAGE_K) -> dict:
|
||||
w = n_pairs / (n_pairs + k)
|
||||
result = {}
|
||||
for y in set(raw) | set(parent):
|
||||
r = raw.get(y, parent.get(y, 0.0))
|
||||
p = parent.get(y, raw.get(y, 0.0))
|
||||
result[y] = w * r + (1 - w) * p
|
||||
return result
|
||||
|
||||
|
||||
def apply_shrinkage(
|
||||
sector_idx,
|
||||
sector_n,
|
||||
district_idx,
|
||||
district_n,
|
||||
area_idx,
|
||||
area_n,
|
||||
national_idx,
|
||||
national_n,
|
||||
hedonic_idx,
|
||||
all_sectors,
|
||||
sector_to_dist,
|
||||
dist_to_area,
|
||||
):
|
||||
"""Top-down hierarchical shrinkage: national→hedonic, area→national, etc."""
|
||||
# National → hedonic
|
||||
national_shrunk = shrink_index(national_idx, hedonic_idx, national_n)
|
||||
|
||||
# Area → national
|
||||
area_shrunk = {}
|
||||
for area, idx in area_idx.items():
|
||||
area_shrunk[area] = shrink_index(idx, national_shrunk, area_n[area])
|
||||
|
||||
# District → area
|
||||
district_shrunk = {}
|
||||
for dist, idx in district_idx.items():
|
||||
a = dist_to_area.get(dist, "")
|
||||
parent = area_shrunk.get(a, national_shrunk)
|
||||
district_shrunk[dist] = shrink_index(idx, parent, district_n[dist])
|
||||
|
||||
# Sector → district
|
||||
sector_shrunk = {}
|
||||
for sec, idx in sector_idx.items():
|
||||
d = sector_to_dist.get(sec, "")
|
||||
parent = district_shrunk.get(d, national_shrunk)
|
||||
sector_shrunk[sec] = shrink_index(idx, parent, sector_n[sec])
|
||||
|
||||
# Fill sectors without their own index
|
||||
for sec in all_sectors:
|
||||
if sec not in sector_shrunk:
|
||||
d = sector_to_dist.get(sec, "")
|
||||
a = dist_to_area.get(d, "")
|
||||
sector_shrunk[sec] = district_shrunk.get(
|
||||
d, area_shrunk.get(a, national_shrunk)
|
||||
)
|
||||
|
||||
return sector_shrunk
|
||||
|
||||
|
||||
# --- Spatial smoothing ---
|
||||
|
||||
|
||||
def spatial_smooth(
|
||||
sector_indices: dict,
|
||||
centroids: dict,
|
||||
n_pairs_map: dict,
|
||||
) -> dict:
|
||||
"""Blend sparse sector indices with K nearest neighbors."""
|
||||
# Build coordinate arrays for sectors with centroids
|
||||
sectors_with_coords = [s for s in sector_indices if s in centroids]
|
||||
if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
|
||||
return sector_indices
|
||||
|
||||
coords = np.array([centroids[s] for s in sectors_with_coords])
|
||||
# Scale longitude by cos(mean_lat) for approximate Euclidean distance
|
||||
mean_lat = np.mean(coords[:, 0])
|
||||
scale = np.cos(np.radians(mean_lat))
|
||||
scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
|
||||
tree = KDTree(scaled_coords)
|
||||
|
||||
result = dict(sector_indices)
|
||||
for i, sec in enumerate(sectors_with_coords):
|
||||
n = n_pairs_map.get(sec, 0)
|
||||
self_w = n / (n + SPATIAL_BLEND_K)
|
||||
if self_w > 0.95:
|
||||
continue # enough data, skip smoothing
|
||||
|
||||
dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
|
||||
# Skip self (index 0, distance ~0)
|
||||
neighbor_dists = dists[1:]
|
||||
neighbor_idxs = idxs[1:]
|
||||
|
||||
inv_dists = []
|
||||
neighbor_indices = []
|
||||
for d, j in zip(neighbor_dists, neighbor_idxs):
|
||||
ns = sectors_with_coords[j]
|
||||
if d > 0 and ns in sector_indices:
|
||||
inv_dists.append(1.0 / d)
|
||||
neighbor_indices.append(sector_indices[ns])
|
||||
|
||||
if not neighbor_indices:
|
||||
continue
|
||||
|
||||
total_inv = sum(inv_dists)
|
||||
nbr_w = 1.0 - self_w
|
||||
ws = [iw / total_inv * nbr_w for iw in inv_dists]
|
||||
|
||||
blended = {}
|
||||
all_years = set(sector_indices[sec])
|
||||
for ni in neighbor_indices:
|
||||
all_years |= set(ni)
|
||||
for y in all_years:
|
||||
val = self_w * sector_indices[sec].get(y, 0.0)
|
||||
for ni, w in zip(neighbor_indices, ws):
|
||||
val += w * ni.get(y, 0.0)
|
||||
blended[y] = val
|
||||
result[sec] = blended
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# --- Forward fill ---
|
||||
|
||||
|
||||
def forward_fill(index: dict, min_year: int, max_year: int) -> dict:
|
||||
filled = {}
|
||||
last = 0.0
|
||||
for y in range(min_year, max_year + 1):
|
||||
if y in index:
|
||||
last = index[y]
|
||||
filled[y] = last
|
||||
return filled
|
||||
|
||||
|
||||
# --- Main ---
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Build improved repeat-sales price index"
|
||||
)
|
||||
parser.add_argument("--input", type=Path, required=True)
|
||||
parser.add_argument("--output", type=Path, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
pairs = extract_pairs(args.input)
|
||||
centroids = extract_centroids(args.input)
|
||||
|
||||
min_year = int(pairs["year1"].min())
|
||||
max_year = max(int(pairs["year2"].max()), CURRENT_YEAR)
|
||||
|
||||
hedonic_idx = compute_hedonic_index(args.input, min_year, max_year)
|
||||
|
||||
# Precompute hierarchy
|
||||
all_sectors = pairs["sector"].unique().to_list()
|
||||
sector_to_dist = {}
|
||||
dist_to_area = {}
|
||||
for s in all_sectors:
|
||||
d, a = hierarchy_keys(s)
|
||||
sector_to_dist[s] = d
|
||||
dist_to_area[d] = a
|
||||
|
||||
# Process each type group + "All"
|
||||
all_type_groups = ["All"] + TYPE_GROUPS
|
||||
final = {} # {type_group: {sector: {year: log_index}}}
|
||||
final_n = {} # {type_group: {sector: n_pairs}}
|
||||
|
||||
for tg in all_type_groups:
|
||||
print(f"\n--- {tg} ---")
|
||||
typed = pairs if tg == "All" else pairs.filter(pl.col("type_group") == tg)
|
||||
if len(typed) < MIN_PAIRS:
|
||||
print(f" Skipping (only {len(typed)} pairs)")
|
||||
final[tg] = {s: dict(hedonic_idx) for s in all_sectors}
|
||||
final_n[tg] = {s: 0 for s in all_sectors}
|
||||
continue
|
||||
|
||||
print(f" {len(typed):,} pairs")
|
||||
|
||||
# National
|
||||
np_arrs = typed.select("year1", "year2", "log_ratio", "weight")
|
||||
national_idx = solve_robust_index(
|
||||
np_arrs["year1"].to_numpy(),
|
||||
np_arrs["year2"].to_numpy(),
|
||||
np_arrs["log_ratio"].to_numpy(),
|
||||
np_arrs["weight"].to_numpy(),
|
||||
)
|
||||
national_n = len(typed)
|
||||
print(f" National: {len(national_idx)} years")
|
||||
|
||||
# Area, district, sector
|
||||
print(" Computing per-level indices:")
|
||||
area_idx, area_n = compute_indices_for_level(typed, "area")
|
||||
district_idx, district_n = compute_indices_for_level(typed, "district")
|
||||
sector_idx, sector_n = compute_indices_for_level(typed, "sector")
|
||||
print(
|
||||
f" {len(area_idx)} areas, {len(district_idx)} districts, {len(sector_idx)} sectors"
|
||||
)
|
||||
|
||||
# Shrinkage
|
||||
print(" Applying shrinkage...")
|
||||
sector_shrunk = apply_shrinkage(
|
||||
sector_idx,
|
||||
sector_n,
|
||||
district_idx,
|
||||
district_n,
|
||||
area_idx,
|
||||
area_n,
|
||||
national_idx,
|
||||
national_n,
|
||||
hedonic_idx,
|
||||
all_sectors,
|
||||
sector_to_dist,
|
||||
dist_to_area,
|
||||
)
|
||||
|
||||
# Spatial smoothing
|
||||
print(" Spatial smoothing...")
|
||||
sector_smoothed = spatial_smooth(sector_shrunk, centroids, sector_n)
|
||||
|
||||
# Forward fill
|
||||
for sec in all_sectors:
|
||||
sector_smoothed[sec] = forward_fill(
|
||||
sector_smoothed.get(sec, hedonic_idx), min_year, max_year
|
||||
)
|
||||
|
||||
final[tg] = sector_smoothed
|
||||
final_n[tg] = sector_n
|
||||
|
||||
# Assemble output
|
||||
print("\nAssembling output...")
|
||||
rows = []
|
||||
for tg in all_type_groups:
|
||||
for sec in all_sectors:
|
||||
n = final_n[tg].get(sec, 0)
|
||||
for year, log_idx in final[tg][sec].items():
|
||||
rows.append((sec, tg, year, log_idx, n))
|
||||
|
||||
result = pl.DataFrame(
|
||||
rows,
|
||||
schema={
|
||||
"sector": pl.String,
|
||||
"type_group": pl.String,
|
||||
"year": pl.Int32,
|
||||
"log_index": pl.Float64,
|
||||
"n_pairs": pl.Int64,
|
||||
},
|
||||
orient="row",
|
||||
).sort("type_group", "sector", "year")
|
||||
|
||||
result.write_parquet(args.output)
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
|
||||
print(
|
||||
f" {result['sector'].n_unique():,} sectors × {len(all_type_groups)} types × {max_year - min_year + 1} years = {len(result):,} rows"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,572 +0,0 @@
|
|||
"""Estimate per-area renovation premiums from repeat-sale residuals.
|
||||
|
||||
For each repeat-sale pair, computes the residual after removing the price-index
|
||||
predicted return. Pairs where renovation events occurred between sales should have
|
||||
systematically higher residuals. A WLS regression estimates the log-premium per
|
||||
event type, with hierarchical shrinkage and spatial smoothing.
|
||||
|
||||
Output: renovation_premium.parquet — sector × type_group × event_type → log_premium
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from scipy.spatial import KDTree
|
||||
|
||||
from pipeline.transform._price_utils import (
|
||||
SHRINKAGE_K,
|
||||
TYPE_GROUPS,
|
||||
extract_centroids,
|
||||
hierarchy_keys,
|
||||
sector_expr,
|
||||
type_group_expr,
|
||||
)
|
||||
|
||||
HALF_LIFE = 10.0
|
||||
DECAY_RATE = math.log(2) / HALF_LIFE
|
||||
OUTLIER_THRESHOLD = 3.0
|
||||
MIN_PAIRS = 10
|
||||
SPATIAL_NEIGHBORS = 5
|
||||
SPATIAL_BLEND_K = 30
|
||||
EVENT_TYPES = ["Extension", "Renovation", "Remodeling"]
|
||||
|
||||
|
||||
def extract_pairs_with_events(input_path: Path, index_path: Path) -> pl.DataFrame:
|
||||
"""Extract repeat-sale pairs with renovation events and index residuals."""
|
||||
print("Extracting repeat-sale pairs with renovation events...")
|
||||
|
||||
df = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select("Postcode", "historical_prices", "Property type", "renovation_history")
|
||||
.filter(
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("historical_prices").list.len() >= 2,
|
||||
)
|
||||
.with_columns(sector_expr(), type_group_expr())
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(df):,} properties with 2+ transactions")
|
||||
|
||||
# Build consecutive pairs
|
||||
pairs = (
|
||||
df.lazy()
|
||||
.with_columns(
|
||||
pl.col("historical_prices")
|
||||
.list.slice(0, pl.col("historical_prices").list.len() - 1)
|
||||
.alias("from_txn"),
|
||||
pl.col("historical_prices").list.slice(1).alias("to_txn"),
|
||||
)
|
||||
.explode("from_txn", "to_txn")
|
||||
.with_columns(
|
||||
pl.col("from_txn").struct.field("year").alias("year1"),
|
||||
pl.col("from_txn").struct.field("price").alias("price1"),
|
||||
pl.col("to_txn").struct.field("year").alias("year2"),
|
||||
pl.col("to_txn").struct.field("price").alias("price2"),
|
||||
)
|
||||
.select(
|
||||
"sector",
|
||||
"type_group",
|
||||
"year1",
|
||||
"price1",
|
||||
"year2",
|
||||
"price2",
|
||||
"renovation_history",
|
||||
)
|
||||
.filter(
|
||||
pl.col("price1") > 0,
|
||||
pl.col("price2") > 0,
|
||||
pl.col("year2") > pl.col("year1"),
|
||||
)
|
||||
.with_columns(
|
||||
(pl.col("price2").cast(pl.Float64) / pl.col("price1").cast(pl.Float64))
|
||||
.log()
|
||||
.alias("log_ratio"),
|
||||
)
|
||||
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
|
||||
.collect()
|
||||
)
|
||||
print(f" {len(pairs):,} repeat-sale pairs")
|
||||
|
||||
# Join price index to compute residuals
|
||||
index = pl.read_parquet(index_path)
|
||||
has_type_group = "type_group" in index.columns
|
||||
|
||||
if has_type_group:
|
||||
idx_typed = index.filter(pl.col("type_group") != "All")
|
||||
idx_all = index.filter(pl.col("type_group") == "All")
|
||||
|
||||
# Join at year1
|
||||
pairs = pairs.join(
|
||||
idx_typed.select(
|
||||
"sector", "type_group", "year", pl.col("log_index").alias("li1_typed")
|
||||
),
|
||||
left_on=["sector", "type_group", "year1"],
|
||||
right_on=["sector", "type_group", "year"],
|
||||
how="left",
|
||||
).join(
|
||||
idx_all.select("sector", "year", pl.col("log_index").alias("li1_all")),
|
||||
left_on=["sector", "year1"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
# Join at year2
|
||||
pairs = pairs.join(
|
||||
idx_typed.select(
|
||||
"sector", "type_group", "year", pl.col("log_index").alias("li2_typed")
|
||||
),
|
||||
left_on=["sector", "type_group", "year2"],
|
||||
right_on=["sector", "type_group", "year"],
|
||||
how="left",
|
||||
).join(
|
||||
idx_all.select("sector", "year", pl.col("log_index").alias("li2_all")),
|
||||
left_on=["sector", "year2"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
pairs = pairs.with_columns(
|
||||
(pl.col("li1_typed").fill_null(pl.col("li1_all"))).alias("_li1"),
|
||||
(pl.col("li2_typed").fill_null(pl.col("li2_all"))).alias("_li2"),
|
||||
)
|
||||
else:
|
||||
pairs = pairs.join(
|
||||
index.select("sector", "year", pl.col("log_index").alias("_li1")),
|
||||
left_on=["sector", "year1"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
).join(
|
||||
index.select("sector", "year", pl.col("log_index").alias("_li2")),
|
||||
left_on=["sector", "year2"],
|
||||
right_on=["sector", "year"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Compute residual = log_ratio - (index2 - index1)
|
||||
pairs = pairs.with_columns(
|
||||
(
|
||||
pl.col("log_ratio")
|
||||
- (pl.col("_li2").fill_null(0.0) - pl.col("_li1").fill_null(0.0))
|
||||
).alias("residual"),
|
||||
(1.0 / (pl.col("year2") - pl.col("year1")).cast(pl.Float64).sqrt()).alias(
|
||||
"weight"
|
||||
),
|
||||
)
|
||||
|
||||
# For each pair, compute time-decayed renovation indicators
|
||||
# Use row index for unique identification (composite keys aren't unique per pair)
|
||||
pairs = pairs.with_row_index("_pair_idx")
|
||||
|
||||
for et in EVENT_TYPES:
|
||||
col_name = f"has_{et.lower()}"
|
||||
pairs = pairs.with_columns(pl.lit(0.0).alias(col_name))
|
||||
|
||||
# Process properties that have renovation history
|
||||
has_reno = pairs.filter(
|
||||
pl.col("renovation_history").is_not_null()
|
||||
& (pl.col("renovation_history").list.len() > 0)
|
||||
)
|
||||
|
||||
if len(has_reno) > 0:
|
||||
reno_exploded = (
|
||||
has_reno.select("_pair_idx", "year1", "year2", "renovation_history")
|
||||
.explode("renovation_history")
|
||||
.with_columns(
|
||||
pl.col("renovation_history").struct.field("year").alias("event_year"),
|
||||
pl.col("renovation_history").struct.field("event").alias("event_type"),
|
||||
)
|
||||
# Only events between the two sales
|
||||
.filter(
|
||||
(pl.col("event_year") > pl.col("year1"))
|
||||
& (pl.col("event_year") <= pl.col("year2"))
|
||||
)
|
||||
)
|
||||
|
||||
if len(reno_exploded) > 0:
|
||||
# For each pair + event type, take the most recent event
|
||||
latest_events = reno_exploded.group_by(
|
||||
"_pair_idx", "event_type", "year2"
|
||||
).agg(pl.col("event_year").max().alias("latest_event_year"))
|
||||
|
||||
# Compute time-decayed indicator: exp(-decay_rate * (year2 - event_year))
|
||||
latest_events = latest_events.with_columns(
|
||||
(
|
||||
-DECAY_RATE
|
||||
* (pl.col("year2") - pl.col("latest_event_year")).cast(pl.Float64)
|
||||
)
|
||||
.exp()
|
||||
.alias("decayed_indicator"),
|
||||
)
|
||||
|
||||
# Pivot to wide format using _pair_idx for unique join
|
||||
for et in EVENT_TYPES:
|
||||
et_data = latest_events.filter(pl.col("event_type") == et)
|
||||
if len(et_data) > 0:
|
||||
col_name = f"has_{et.lower()}"
|
||||
pairs = (
|
||||
pairs.join(
|
||||
et_data.select(
|
||||
"_pair_idx",
|
||||
pl.col("decayed_indicator").alias(f"_{col_name}"),
|
||||
),
|
||||
on="_pair_idx",
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
pl.col(f"_{col_name}").fill_null(0.0).alias(col_name),
|
||||
)
|
||||
.drop(f"_{col_name}")
|
||||
)
|
||||
|
||||
pairs = pairs.drop("_pair_idx")
|
||||
|
||||
# Add hierarchy columns
|
||||
pairs = pairs.with_columns(
|
||||
pl.col("sector").str.replace(r"\s+\d+$", "").alias("district"),
|
||||
).with_columns(
|
||||
pl.col("district").str.replace(r"\d.*$", "").alias("area"),
|
||||
)
|
||||
|
||||
# Count reno pairs
|
||||
reno_mask = (
|
||||
(pl.col("has_extension") > 0)
|
||||
| (pl.col("has_renovation") > 0)
|
||||
| (pl.col("has_remodeling") > 0)
|
||||
)
|
||||
n_reno = pairs.filter(reno_mask).height
|
||||
print(
|
||||
f" {n_reno:,} pairs with renovation events ({n_reno / len(pairs) * 100:.1f}%)"
|
||||
)
|
||||
|
||||
# Drop temporary columns from index join + renovation_history (no longer needed)
|
||||
temp_cols = [
|
||||
c
|
||||
for c in pairs.columns
|
||||
if c.startswith("_li") or c.startswith("li1_") or c.startswith("li2_")
|
||||
]
|
||||
pairs = pairs.drop(temp_cols + ["renovation_history"])
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def wls_regression(
|
||||
residuals: np.ndarray,
|
||||
weights: np.ndarray,
|
||||
X: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
"""Weighted least squares: residual ~ X (with intercept column in X).
|
||||
|
||||
Uses sqrt(weights) scaling to avoid building a full N×N diagonal matrix.
|
||||
"""
|
||||
sqrt_w = np.sqrt(weights)[:, np.newaxis]
|
||||
Xw = X * sqrt_w
|
||||
yw = residuals * sqrt_w.ravel()
|
||||
try:
|
||||
betas = np.linalg.lstsq(Xw, yw, rcond=None)[0]
|
||||
except np.linalg.LinAlgError:
|
||||
betas = np.zeros(X.shape[1])
|
||||
return betas
|
||||
|
||||
|
||||
def compute_premiums_for_group(df: pl.DataFrame) -> dict[str, float]:
|
||||
"""Run WLS regression for a group, return {event_type: log_premium}."""
|
||||
n = len(df)
|
||||
if n < MIN_PAIRS:
|
||||
return {}
|
||||
|
||||
residuals = df["residual"].to_numpy().astype(np.float64)
|
||||
weights = df["weight"].to_numpy().astype(np.float64)
|
||||
|
||||
# Build design matrix: intercept + 3 event indicators
|
||||
X = np.column_stack(
|
||||
[
|
||||
np.ones(n),
|
||||
df["has_extension"].to_numpy().astype(np.float64),
|
||||
df["has_renovation"].to_numpy().astype(np.float64),
|
||||
df["has_remodeling"].to_numpy().astype(np.float64),
|
||||
]
|
||||
)
|
||||
|
||||
# Check if we have any renovation pairs in this group
|
||||
reno_sum = X[:, 1:].sum()
|
||||
if reno_sum < 1.0:
|
||||
return {}
|
||||
|
||||
betas = wls_regression(residuals, weights, X)
|
||||
# betas[0] is intercept, betas[1:4] are the premiums
|
||||
return {
|
||||
"Extension": float(betas[1]),
|
||||
"Renovation": float(betas[2]),
|
||||
"Remodeling": float(betas[3]),
|
||||
}
|
||||
|
||||
|
||||
def compute_premiums_for_level(
|
||||
pairs: pl.DataFrame, group_col: str
|
||||
) -> tuple[dict, dict]:
|
||||
"""Compute premiums per group at a given hierarchy level.
|
||||
|
||||
Returns (premiums, n_reno_pairs) dicts keyed by group value.
|
||||
premiums[key] = {event_type: log_premium}
|
||||
"""
|
||||
groups = pairs.group_by(group_col)
|
||||
premiums = {}
|
||||
n_reno_pairs = {}
|
||||
for key, group_df in groups:
|
||||
key_val = key[0]
|
||||
result = compute_premiums_for_group(group_df)
|
||||
if result:
|
||||
premiums[key_val] = result
|
||||
# Count pairs with any reno indicator
|
||||
reno_mask = (
|
||||
(group_df["has_extension"].to_numpy() > 0)
|
||||
| (group_df["has_renovation"].to_numpy() > 0)
|
||||
| (group_df["has_remodeling"].to_numpy() > 0)
|
||||
)
|
||||
n_reno_pairs[key_val] = int(reno_mask.sum())
|
||||
return premiums, n_reno_pairs
|
||||
|
||||
|
||||
def shrink_premium(
|
||||
raw: dict[str, float], parent: dict[str, float], n: int
|
||||
) -> dict[str, float]:
|
||||
"""Shrink raw premiums toward parent level."""
|
||||
w = n / (n + SHRINKAGE_K)
|
||||
result = {}
|
||||
for et in EVENT_TYPES:
|
||||
r = raw.get(et, parent.get(et, 0.0))
|
||||
p = parent.get(et, raw.get(et, 0.0))
|
||||
result[et] = w * r + (1 - w) * p
|
||||
return result
|
||||
|
||||
|
||||
def apply_shrinkage(
|
||||
sector_prem,
|
||||
sector_n,
|
||||
district_prem,
|
||||
district_n,
|
||||
area_prem,
|
||||
area_n,
|
||||
national_prem,
|
||||
national_n,
|
||||
all_sectors,
|
||||
sector_to_dist,
|
||||
dist_to_area,
|
||||
):
|
||||
"""Top-down hierarchical shrinkage for premiums."""
|
||||
# Area -> national
|
||||
area_shrunk = {}
|
||||
for area, prem in area_prem.items():
|
||||
area_shrunk[area] = shrink_premium(prem, national_prem, area_n.get(area, 0))
|
||||
|
||||
# District -> area
|
||||
district_shrunk = {}
|
||||
for dist, prem in district_prem.items():
|
||||
a = dist_to_area.get(dist, "")
|
||||
parent = area_shrunk.get(a, national_prem)
|
||||
district_shrunk[dist] = shrink_premium(prem, parent, district_n.get(dist, 0))
|
||||
|
||||
# Sector -> district
|
||||
sector_shrunk = {}
|
||||
for sec, prem in sector_prem.items():
|
||||
d = sector_to_dist.get(sec, "")
|
||||
parent = district_shrunk.get(d, national_prem)
|
||||
sector_shrunk[sec] = shrink_premium(prem, parent, sector_n.get(sec, 0))
|
||||
|
||||
# Fill missing sectors
|
||||
for sec in all_sectors:
|
||||
if sec not in sector_shrunk:
|
||||
d = sector_to_dist.get(sec, "")
|
||||
a = dist_to_area.get(d, "")
|
||||
sector_shrunk[sec] = district_shrunk.get(
|
||||
d, area_shrunk.get(a, national_prem)
|
||||
)
|
||||
|
||||
return sector_shrunk
|
||||
|
||||
|
||||
def spatial_smooth(
|
||||
sector_premiums: dict[str, dict[str, float]],
|
||||
centroids: dict[str, tuple[float, float]],
|
||||
n_reno_map: dict[str, int],
|
||||
) -> dict[str, dict[str, float]]:
|
||||
"""Blend sparse sector premiums with K nearest neighbors."""
|
||||
sectors_with_coords = [s for s in sector_premiums if s in centroids]
|
||||
if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
|
||||
return sector_premiums
|
||||
|
||||
coords = np.array([centroids[s] for s in sectors_with_coords])
|
||||
mean_lat = np.mean(coords[:, 0])
|
||||
scale = np.cos(np.radians(mean_lat))
|
||||
scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
|
||||
tree = KDTree(scaled_coords)
|
||||
|
||||
result = dict(sector_premiums)
|
||||
for i, sec in enumerate(sectors_with_coords):
|
||||
n = n_reno_map.get(sec, 0)
|
||||
self_w = n / (n + SPATIAL_BLEND_K)
|
||||
if self_w > 0.95:
|
||||
continue
|
||||
|
||||
dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
|
||||
neighbor_dists = dists[1:]
|
||||
neighbor_idxs = idxs[1:]
|
||||
|
||||
inv_dists = []
|
||||
neighbor_prems = []
|
||||
for d, j in zip(neighbor_dists, neighbor_idxs):
|
||||
ns = sectors_with_coords[j]
|
||||
if d > 0 and ns in sector_premiums:
|
||||
inv_dists.append(1.0 / d)
|
||||
neighbor_prems.append(sector_premiums[ns])
|
||||
|
||||
if not neighbor_prems:
|
||||
continue
|
||||
|
||||
total_inv = sum(inv_dists)
|
||||
nbr_w = 1.0 - self_w
|
||||
ws = [iw / total_inv * nbr_w for iw in inv_dists]
|
||||
|
||||
blended = {}
|
||||
for et in EVENT_TYPES:
|
||||
val = self_w * sector_premiums[sec].get(et, 0.0)
|
||||
for np_dict, w in zip(neighbor_prems, ws):
|
||||
val += w * np_dict.get(et, 0.0)
|
||||
blended[et] = val
|
||||
result[sec] = blended
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Estimate renovation premiums from repeat-sale residuals"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input", type=Path, required=True, help="Path to wide.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--index", type=Path, required=True, help="Path to price_index.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output renovation_premium.parquet"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
pairs = extract_pairs_with_events(args.input, args.index)
|
||||
centroids = extract_centroids(args.input)
|
||||
|
||||
# Precompute hierarchy
|
||||
all_sectors = pairs["sector"].unique().to_list()
|
||||
sector_to_dist = {}
|
||||
dist_to_area = {}
|
||||
for s in all_sectors:
|
||||
d, a = hierarchy_keys(s)
|
||||
sector_to_dist[s] = d
|
||||
dist_to_area[d] = a
|
||||
|
||||
all_type_groups = ["All"] + TYPE_GROUPS
|
||||
rows = []
|
||||
|
||||
for tg in all_type_groups:
|
||||
print(f"\n--- {tg} ---")
|
||||
typed = pairs if tg == "All" else pairs.filter(pl.col("type_group") == tg)
|
||||
if len(typed) < MIN_PAIRS:
|
||||
print(f" Skipping (only {len(typed)} pairs)")
|
||||
continue
|
||||
|
||||
print(f" {len(typed):,} pairs")
|
||||
|
||||
# National
|
||||
national_prem = compute_premiums_for_group(typed)
|
||||
national_reno = typed.filter(
|
||||
(pl.col("has_extension") > 0)
|
||||
| (pl.col("has_renovation") > 0)
|
||||
| (pl.col("has_remodeling") > 0)
|
||||
).height
|
||||
if not national_prem:
|
||||
print(" No renovation pairs at national level, skipping")
|
||||
continue
|
||||
|
||||
print(
|
||||
" National premiums: "
|
||||
+ ", ".join(
|
||||
f"{et}: {v:.4f} ({math.exp(v) - 1:.1%})"
|
||||
for et, v in national_prem.items()
|
||||
)
|
||||
)
|
||||
|
||||
# Per-level
|
||||
print(" Computing per-level premiums:")
|
||||
area_prem, area_n = compute_premiums_for_level(typed, "area")
|
||||
district_prem, district_n = compute_premiums_for_level(typed, "district")
|
||||
sector_prem, sector_n = compute_premiums_for_level(typed, "sector")
|
||||
print(
|
||||
f" {len(area_prem)} areas, {len(district_prem)} districts, {len(sector_prem)} sectors with data"
|
||||
)
|
||||
|
||||
# Shrinkage
|
||||
print(" Applying shrinkage...")
|
||||
sector_shrunk = apply_shrinkage(
|
||||
sector_prem,
|
||||
sector_n,
|
||||
district_prem,
|
||||
district_n,
|
||||
area_prem,
|
||||
area_n,
|
||||
national_prem,
|
||||
national_reno,
|
||||
all_sectors,
|
||||
sector_to_dist,
|
||||
dist_to_area,
|
||||
)
|
||||
|
||||
# Spatial smoothing
|
||||
print(" Spatial smoothing...")
|
||||
sector_smoothed = spatial_smooth(sector_shrunk, centroids, sector_n)
|
||||
|
||||
# Collect rows
|
||||
for sec in all_sectors:
|
||||
prem = sector_smoothed.get(sec, national_prem)
|
||||
n = sector_n.get(sec, 0)
|
||||
for et in EVENT_TYPES:
|
||||
rows.append((sec, tg, et, prem.get(et, 0.0), n))
|
||||
|
||||
result = pl.DataFrame(
|
||||
rows,
|
||||
schema={
|
||||
"sector": pl.String,
|
||||
"type_group": pl.String,
|
||||
"event_type": pl.String,
|
||||
"log_premium": pl.Float64,
|
||||
"n_reno_pairs": pl.Int64,
|
||||
},
|
||||
orient="row",
|
||||
).sort("type_group", "sector", "event_type")
|
||||
|
||||
result.write_parquet(args.output)
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
|
||||
print(
|
||||
f" {result['sector'].n_unique():,} sectors x {len(all_type_groups)} types x {len(EVENT_TYPES)} events = {len(result):,} rows"
|
||||
)
|
||||
|
||||
# Print summary statistics
|
||||
print("\nNational premium summary:")
|
||||
national = (
|
||||
result.filter(pl.col("type_group") == "All")
|
||||
.group_by("event_type")
|
||||
.agg(
|
||||
pl.col("log_premium").mean().alias("mean_log_premium"),
|
||||
)
|
||||
)
|
||||
for row in national.iter_rows(named=True):
|
||||
et = row["event_type"]
|
||||
lp = row["mean_log_premium"]
|
||||
print(f" {et}: log_premium={lp:.4f} ({math.exp(lp) - 1:.1%} price uplift)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -26,6 +26,10 @@ dependencies = [
|
|||
"pyproj>=3.7.2",
|
||||
"pyshp>=2.3.0",
|
||||
"folium>=0.20.0",
|
||||
"flask",
|
||||
"httpx",
|
||||
"polars",
|
||||
"fake-useragent>=2.2.0",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
|
|
|
|||
3
r5-java/.gitignore
vendored
Normal file
3
r5-java/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
jdk/
|
||||
lib/
|
||||
out/
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
FROM eclipse-temurin:21-jdk AS build
|
||||
WORKDIR /app
|
||||
|
||||
# Download pre-built R5 fat JAR from GitHub Releases (includes all R5 deps)
|
||||
ADD https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar /app/lib/r5.jar
|
||||
|
||||
# Gson for JSON (HTTP server is built into JDK)
|
||||
ADD https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar /app/lib/gson.jar
|
||||
|
||||
COPY src/ src/
|
||||
RUN javac -cp "lib/*" -d out src/main/java/propertymap/App.java
|
||||
|
||||
FROM eclipse-temurin:21-jre
|
||||
WORKDIR /app
|
||||
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=build /app/lib/ /app/lib/
|
||||
COPY --from=build /app/out/ /app/out/
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
TRANSIT_DIR=$DATA_DIR
|
||||
NETWORK_DIR=$NETWORK_CACHE_DIR
|
||||
BUILD_DIR="$NETWORK_DIR/build"
|
||||
|
||||
# If no cached network yet, copy transit data to a writable location for the build.
|
||||
# R5 writes temp files (.mapdb) next to the OSM/GTFS files during network construction.
|
||||
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
|
||||
echo "No cached network — copying transit data to writable build dir..."
|
||||
mkdir -p "$BUILD_DIR"
|
||||
cp "$OSM_DIR"/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
|
||||
cp "$TRANSIT_DIR"/*.zip "$BUILD_DIR/" 2>/dev/null || true
|
||||
export DATA_DIR="$BUILD_DIR"
|
||||
fi
|
||||
|
||||
exec java -Xmx16g -cp "out:lib/*" propertymap.App
|
||||
129
r5-java/run.sh
Executable file
129
r5-java/run.sh
Executable file
|
|
@ -0,0 +1,129 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Batch-compute travel times from all places to all England postcodes
|
||||
# for all transport modes (car, bicycle, walking, transit).
|
||||
#
|
||||
# Uses each place as origin with all postcodes as destinations — R5 does one
|
||||
# routing computation per place, then reads off travel times to all postcodes.
|
||||
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
|
||||
#
|
||||
# Output: property-data/travel-times/{mode}/
|
||||
# - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
|
||||
# - postcodes_ref.parquet: postcode order reference
|
||||
# - places_ref.parquet: place order reference
|
||||
#
|
||||
# Usage:
|
||||
# ./r5-java/run.sh # 4 threads, 16g heap
|
||||
# ./r5-java/run.sh --threads 8
|
||||
# ./r5-java/run.sh --heap 24g
|
||||
|
||||
# --- Defaults ---
|
||||
THREADS=28
|
||||
HEAP=40g
|
||||
NETWORK_DIR=property-data/r5-network
|
||||
OUTPUT_BASE=property-data/travel-times
|
||||
R5_DIR=r5-java
|
||||
|
||||
# --- Parse args ---
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--threads) THREADS="$2"; shift 2 ;;
|
||||
--heap) HEAP="$2"; shift 2 ;;
|
||||
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
|
||||
*) echo "Unknown: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Verify we're in project root ---
|
||||
if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
|
||||
echo "Error: run from the property-map project root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== R5 Batch Travel Times ==="
|
||||
echo "Threads: $THREADS | Heap: $HEAP"
|
||||
echo ""
|
||||
|
||||
# --- Step 1: Download JDK if needed ---
|
||||
JDK_DIR="$R5_DIR/jdk"
|
||||
if [ ! -d "$JDK_DIR" ]; then
|
||||
echo "--- Downloading JDK 21 ---"
|
||||
ARCH=$(uname -m)
|
||||
case "$ARCH" in
|
||||
x86_64|amd64) JDK_ARCH="x64" ;;
|
||||
aarch64|arm64) JDK_ARCH="aarch64" ;;
|
||||
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
|
||||
esac
|
||||
JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
|
||||
mkdir -p "$JDK_DIR"
|
||||
curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
|
||||
fi
|
||||
export JAVA_HOME="$JDK_DIR"
|
||||
export PATH="$JAVA_HOME/bin:$PATH"
|
||||
|
||||
# --- Step 2: Download library JARs ---
|
||||
LIB_DIR="$R5_DIR/lib"
|
||||
mkdir -p "$LIB_DIR"
|
||||
|
||||
R5_JAR="$LIB_DIR/r5.jar"
|
||||
DUCKDB_JAR="$LIB_DIR/duckdb.jar"
|
||||
|
||||
if [ ! -f "$R5_JAR" ]; then
|
||||
echo "--- Downloading R5 v7.5 fat JAR ---"
|
||||
curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
|
||||
fi
|
||||
|
||||
if [ ! -f "$DUCKDB_JAR" ]; then
|
||||
echo "--- Downloading DuckDB JDBC ---"
|
||||
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.0.0/duckdb_jdbc-1.0.0.jar
|
||||
fi
|
||||
|
||||
# --- Step 3: Compile Java source ---
|
||||
OUT_DIR="$R5_DIR/out"
|
||||
SRC_DIR="$R5_DIR/src/main/java/propertymap"
|
||||
|
||||
NEEDS_COMPILE=false
|
||||
for src in "$SRC_DIR"/*.java; do
|
||||
class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
|
||||
if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
|
||||
NEEDS_COMPILE=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if $NEEDS_COMPILE; then
|
||||
echo "--- Compiling Java source ---"
|
||||
mkdir -p "$OUT_DIR"
|
||||
javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
|
||||
fi
|
||||
|
||||
# --- Step 4: Prepare network build directory ---
|
||||
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
|
||||
# Copy source data to a writable build dir to avoid polluting the originals.
|
||||
mkdir -p "$NETWORK_DIR"
|
||||
DATA_DIR="property-data/transit"
|
||||
|
||||
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
|
||||
BUILD_DIR="$NETWORK_DIR/build"
|
||||
echo "--- No cached network — copying transit data to build dir ---"
|
||||
mkdir -p "$BUILD_DIR"
|
||||
cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
|
||||
cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null || true
|
||||
DATA_DIR="$BUILD_DIR"
|
||||
fi
|
||||
|
||||
# --- Step 5: Run batch ---
|
||||
echo ""
|
||||
echo "--- Starting batch computation ---"
|
||||
DATA_DIR="$DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
|
||||
java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
||||
--postcodes property-data/arcgis_data.parquet \
|
||||
--places property-data/places.parquet \
|
||||
--output-dir "$OUTPUT_BASE" \
|
||||
--threads "$THREADS"
|
||||
|
||||
echo ""
|
||||
echo "=== Complete ==="
|
||||
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
|
||||
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"
|
||||
|
|
@ -1,223 +1,208 @@
|
|||
package propertymap;
|
||||
|
||||
import com.conveyal.r5.OneOriginResult;
|
||||
import com.conveyal.r5.analyst.FreeFormPointSet;
|
||||
import com.conveyal.r5.analyst.PointSet;
|
||||
import com.conveyal.r5.analyst.TravelTimeComputer;
|
||||
import com.conveyal.r5.analyst.WebMercatorExtents;
|
||||
import com.conveyal.r5.analyst.cluster.RegionalTask;
|
||||
import com.conveyal.r5.analyst.cluster.TravelTimeResult;
|
||||
import com.conveyal.r5.api.util.LegMode;
|
||||
import com.conveyal.r5.api.util.TransitModes;
|
||||
import com.conveyal.r5.kryo.KryoNetworkSerializer;
|
||||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import com.google.gson.Gson;
|
||||
import com.sun.net.httpserver.HttpExchange;
|
||||
import com.sun.net.httpserver.HttpServer;
|
||||
import org.locationtech.jts.geom.Coordinate;
|
||||
import org.duckdb.DuckDBConnection;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.util.EnumSet;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Batch-compute travel times from each origin (place) to all destinations (postcodes)
|
||||
* for all transport modes (car, bicycle, walking, transit).
|
||||
*
|
||||
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
|
||||
* with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
|
||||
*/
|
||||
public class App {
|
||||
private static TransportNetwork network;
|
||||
private static final Gson gson = new Gson();
|
||||
|
||||
static class TravelTimeRequest {
|
||||
double[] origin; // [lat, lon]
|
||||
double[][] destinations; // [[lat, lon], ...]
|
||||
String mode; // "transit", "car", "bicycle", "walking"
|
||||
}
|
||||
|
||||
static class TravelTimeResponse {
|
||||
double[] travel_times; // minutes, -1 = unreachable
|
||||
}
|
||||
private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
|
||||
private static final int MAX_RETRIES = 2;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String dataDir = System.getenv("DATA_DIR");
|
||||
String postcodesPath = requiredArg(args, "--postcodes");
|
||||
String placesPath = requiredArg(args, "--places");
|
||||
String outputDirStr = requiredArg(args, "--output-dir");
|
||||
int threads = Integer.parseInt(optionalArg(args, "--threads", "4"));
|
||||
|
||||
if (dataDir == null) {
|
||||
System.err.println("Error: DATA_DIR environment variable not set");
|
||||
System.exit(1);
|
||||
}
|
||||
Path outDir = Paths.get(outputDirStr);
|
||||
Files.createDirectories(outDir);
|
||||
|
||||
String networkCacheDir = System.getenv("NETWORK_CACHE_DIR");
|
||||
if (networkCacheDir == null) {
|
||||
System.err.println("Error: NETWORK_CACHE_DIR environment variable not set");
|
||||
System.exit(1);
|
||||
}
|
||||
LocalDate today = LocalDate.now();
|
||||
TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR"));
|
||||
|
||||
System.out.println("Loading transport network from " + dataDir);
|
||||
System.out.println("Network cache dir: " + networkCacheDir);
|
||||
System.err.println("Loading postcodes (England only)...");
|
||||
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
|
||||
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
|
||||
int nDest = postcodes.lats().length;
|
||||
System.err.printf(" %,d postcodes%n", nDest);
|
||||
|
||||
File cacheFile = new File(networkCacheDir, "network.dat");
|
||||
if (cacheFile.exists()) {
|
||||
System.out.println("Loading cached network from " + cacheFile);
|
||||
network = KryoNetworkSerializer.read(cacheFile);
|
||||
} else {
|
||||
System.out.println("Building network (first run, this takes a few minutes)...");
|
||||
network = TransportNetwork.fromDirectory(new File(dataDir));
|
||||
new File(networkCacheDir).mkdirs();
|
||||
KryoNetworkSerializer.write(network, cacheFile);
|
||||
System.out.println("Network cached to " + cacheFile);
|
||||
}
|
||||
List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());
|
||||
|
||||
// Build stop-to-vertex distance tables (needed for egress routing in transit mode).
|
||||
// Not built by fromDirectory() and too large to fit in the Kryo cache with 4GB heap.
|
||||
System.out.println("Building stop-to-vertex distance tables...");
|
||||
network.transitLayer.buildDistanceTables(null);
|
||||
System.out.println("Distance tables built");
|
||||
System.err.println("Loading places (deduplicated)...");
|
||||
double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
|
||||
double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
|
||||
int nOrigins = originLats.length;
|
||||
System.err.printf(" %,d places%n", nOrigins);
|
||||
System.err.printf(" Estimated output: %.1f GB (%,d x %,d x 2B)%n",
|
||||
(double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);
|
||||
|
||||
System.out.println("Transport network loaded successfully");
|
||||
|
||||
HttpServer server = HttpServer.create(new InetSocketAddress(8003), 0);
|
||||
|
||||
server.createContext("/health", exchange -> {
|
||||
sendResponse(exchange, 200, "ok");
|
||||
// One thread pool shared across all modes
|
||||
ExecutorService pool = Executors.newFixedThreadPool(threads);
|
||||
// One DuckDB connection per thread, reused across all writes
|
||||
ThreadLocal<DuckDBConnection> threadConn = ThreadLocal.withInitial(() -> {
|
||||
try { return Parquet.connect(); }
|
||||
catch (Exception e) { throw new RuntimeException(e); }
|
||||
});
|
||||
|
||||
server.createContext("/travel-times", exchange -> {
|
||||
if (!"POST".equals(exchange.getRequestMethod())) {
|
||||
sendResponse(exchange, 405, "Method not allowed");
|
||||
return;
|
||||
try {
|
||||
for (String mode : MODES) {
|
||||
processMode(network, chunks, postcodes.codes(), originLats, originLons,
|
||||
nDest, outDir, mode, today, pool, threadConn);
|
||||
}
|
||||
} finally {
|
||||
pool.shutdown();
|
||||
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
}
|
||||
|
||||
private static void processMode(
|
||||
TransportNetwork network, List<Router.DestinationChunk> chunks,
|
||||
String[] postcodes, double[] originLats, double[] originLons, int nDest,
|
||||
Path outDir, String mode, LocalDate date,
|
||||
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
|
||||
|
||||
int nOrigins = originLats.length;
|
||||
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
|
||||
Path modeDir = outDir.resolve(mode);
|
||||
Files.createDirectories(modeDir);
|
||||
|
||||
List<Integer> remaining = findRemaining(modeDir, nOrigins);
|
||||
int alreadyDone = nOrigins - remaining.size();
|
||||
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
|
||||
|
||||
if (remaining.isEmpty()) {
|
||||
System.err.println(" All origins completed for this mode!");
|
||||
return;
|
||||
}
|
||||
|
||||
long startMs = System.currentTimeMillis();
|
||||
int total = remaining.size();
|
||||
AtomicInteger completed = new AtomicInteger(0);
|
||||
AtomicInteger failed = new AtomicInteger(0);
|
||||
|
||||
// Progress reporter on a timer instead of per-task stderr writes
|
||||
ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> {
|
||||
Thread t = new Thread(r, "progress");
|
||||
t.setDaemon(true);
|
||||
return t;
|
||||
});
|
||||
reporter.scheduleAtFixedRate(() -> {
|
||||
int c = completed.get();
|
||||
if (c == 0) return;
|
||||
double secs = (System.currentTimeMillis() - startMs) / 1000.0;
|
||||
double rate = c / secs;
|
||||
double etaH = (total - c) / rate / 3600;
|
||||
System.err.printf("\r [%,d/%,d] %.1f/s | ETA %.1fh | fail %d",
|
||||
c, total, rate, etaH, failed.get());
|
||||
}, 2, 2, TimeUnit.SECONDS);
|
||||
|
||||
// Submit all work, wait for completion via CountDownLatch-like pattern
|
||||
java.util.concurrent.CountDownLatch latch = new java.util.concurrent.CountDownLatch(remaining.size());
|
||||
|
||||
for (int idx : remaining) {
|
||||
pool.submit(() -> {
|
||||
try {
|
||||
processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
|
||||
nDest, modeDir, mode, date, idx, threadConn.get());
|
||||
completed.incrementAndGet();
|
||||
} catch (Exception e) {
|
||||
failed.incrementAndGet();
|
||||
System.err.printf("%n [FAIL] origin %d: %s%n", idx, e.getMessage());
|
||||
} finally {
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
latch.await();
|
||||
reporter.shutdown();
|
||||
|
||||
double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
|
||||
int n = completed.get();
|
||||
System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n",
|
||||
n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get());
|
||||
}
|
||||
|
||||
/** Compute and write travel times for a single origin, with retry on failure. */
|
||||
private static void processOrigin(
|
||||
TransportNetwork network, List<Router.DestinationChunk> chunks,
|
||||
String[] postcodes, double lat, double lon, int nDest,
|
||||
Path modeDir, String mode, LocalDate date, int idx,
|
||||
DuckDBConnection conn) throws Exception {
|
||||
|
||||
Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
|
||||
Exception lastError = null;
|
||||
|
||||
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
handleTravelTimes(exchange);
|
||||
short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
|
||||
Parquet.writeTravelTimes(conn, outPath, postcodes, times);
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error handling travel-times: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
sendResponse(exchange, 500, "Internal server error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
server.setExecutor(java.util.concurrent.Executors.newFixedThreadPool(4));
|
||||
server.start();
|
||||
System.out.println("R5 service listening on port 8003");
|
||||
}
|
||||
|
||||
private static void sendResponse(HttpExchange exchange, int status, String body) throws IOException {
|
||||
byte[] bytes = body.getBytes(StandardCharsets.UTF_8);
|
||||
exchange.getResponseHeaders().set("Content-Type", "application/json");
|
||||
exchange.sendResponseHeaders(status, bytes.length);
|
||||
try (OutputStream os = exchange.getResponseBody()) {
|
||||
os.write(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
private static void handleTravelTimes(HttpExchange exchange) throws IOException {
|
||||
long t0 = System.currentTimeMillis();
|
||||
|
||||
String body = new String(exchange.getRequestBody().readAllBytes(), StandardCharsets.UTF_8);
|
||||
TravelTimeRequest req = gson.fromJson(body, TravelTimeRequest.class);
|
||||
|
||||
if (req.origin == null || req.origin.length != 2) {
|
||||
sendResponse(exchange, 400, "{\"error\":\"origin must be [lat, lon]\"}");
|
||||
return;
|
||||
}
|
||||
if (req.destinations == null || req.destinations.length == 0) {
|
||||
sendResponse(exchange, 400, "{\"error\":\"destinations must be non-empty\"}");
|
||||
return;
|
||||
}
|
||||
|
||||
String mode = req.mode != null ? req.mode : "transit";
|
||||
|
||||
// Build destination point set (Coordinate takes x=lon, y=lat)
|
||||
Coordinate[] coords = new Coordinate[req.destinations.length];
|
||||
for (int i = 0; i < req.destinations.length; i++) {
|
||||
coords[i] = new Coordinate(req.destinations[i][1], req.destinations[i][0]); // lon, lat
|
||||
}
|
||||
FreeFormPointSet destinations = new FreeFormPointSet(coords);
|
||||
|
||||
// Build the regional task
|
||||
RegionalTask task = new RegionalTask();
|
||||
task.fromLat = req.origin[0];
|
||||
task.fromLon = req.origin[1];
|
||||
task.date = LocalDate.now();
|
||||
task.percentiles = new int[]{50};
|
||||
task.recordTimes = true;
|
||||
task.destinationPointSets = new PointSet[]{ destinations };
|
||||
|
||||
// Set grid extents from destination point set (required by TravelTimeComputer)
|
||||
WebMercatorExtents extents = destinations.getWebMercatorExtents();
|
||||
task.zoom = extents.zoom;
|
||||
task.west = extents.west;
|
||||
task.north = extents.north;
|
||||
task.width = extents.width;
|
||||
task.height = extents.height;
|
||||
|
||||
switch (mode) {
|
||||
case "car":
|
||||
task.fromTime = 8 * 3600;
|
||||
task.toTime = 8 * 3600 + 60;
|
||||
task.maxTripDurationMinutes = 120;
|
||||
task.accessModes = EnumSet.of(LegMode.CAR);
|
||||
task.egressModes = EnumSet.of(LegMode.CAR);
|
||||
task.directModes = EnumSet.of(LegMode.CAR);
|
||||
task.transitModes = EnumSet.noneOf(TransitModes.class);
|
||||
break;
|
||||
case "bicycle":
|
||||
task.fromTime = 8 * 3600;
|
||||
task.toTime = 8 * 3600 + 60;
|
||||
task.maxTripDurationMinutes = 120;
|
||||
task.accessModes = EnumSet.of(LegMode.BICYCLE);
|
||||
task.egressModes = EnumSet.of(LegMode.BICYCLE);
|
||||
task.directModes = EnumSet.of(LegMode.BICYCLE);
|
||||
task.transitModes = EnumSet.noneOf(TransitModes.class);
|
||||
break;
|
||||
case "walking":
|
||||
task.fromTime = 8 * 3600;
|
||||
task.toTime = 8 * 3600 + 60;
|
||||
task.maxTripDurationMinutes = 120;
|
||||
task.accessModes = EnumSet.of(LegMode.WALK);
|
||||
task.egressModes = EnumSet.of(LegMode.WALK);
|
||||
task.directModes = EnumSet.of(LegMode.WALK);
|
||||
task.transitModes = EnumSet.noneOf(TransitModes.class);
|
||||
break;
|
||||
default: // transit
|
||||
task.fromTime = 8 * 3600;
|
||||
task.toTime = 8 * 3600 + 60; // single RAPTOR sweep
|
||||
task.maxTripDurationMinutes = 120;
|
||||
task.maxRides = 4;
|
||||
task.accessModes = EnumSet.of(LegMode.WALK);
|
||||
task.egressModes = EnumSet.of(LegMode.WALK);
|
||||
task.directModes = EnumSet.of(LegMode.WALK);
|
||||
task.transitModes = EnumSet.of(TransitModes.TRANSIT);
|
||||
break;
|
||||
}
|
||||
|
||||
// Compute travel times
|
||||
TravelTimeComputer computer = new TravelTimeComputer(task, network);
|
||||
OneOriginResult result = computer.computeTravelTimes();
|
||||
|
||||
TravelTimeResponse response = new TravelTimeResponse();
|
||||
response.travel_times = new double[req.destinations.length];
|
||||
|
||||
TravelTimeResult tt = result.travelTimes;
|
||||
if (tt != null) {
|
||||
int[][] values = tt.getValues();
|
||||
// values[percentileIndex][destinationIndex]
|
||||
for (int i = 0; i < req.destinations.length; i++) {
|
||||
if (i < values[0].length && values[0][i] != Integer.MAX_VALUE) {
|
||||
response.travel_times[i] = values[0][i]; // already in minutes
|
||||
} else {
|
||||
response.travel_times[i] = -1; // unreachable
|
||||
lastError = e;
|
||||
if (attempt < MAX_RETRIES) {
|
||||
System.err.printf("%n [RETRY %d/%d] origin %d: %s%n",
|
||||
attempt + 1, MAX_RETRIES, idx, e.getMessage());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < req.destinations.length; i++) {
|
||||
response.travel_times[i] = -1;
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
/** Find origin indices that don't yet have output parquet files. */
|
||||
private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
|
||||
List<Integer> remaining = new ArrayList<>();
|
||||
for (int i = 0; i < nOrigins; i++) {
|
||||
Path f = modeDir.resolve(String.format("%06d.parquet", i));
|
||||
if (!Files.exists(f) || Files.size(f) == 0) {
|
||||
remaining.add(i);
|
||||
}
|
||||
}
|
||||
return remaining;
|
||||
}
|
||||
|
||||
long elapsed = System.currentTimeMillis() - t0;
|
||||
System.out.println("Travel times (" + mode + ") computed for " + req.destinations.length +
|
||||
" destinations in " + elapsed + "ms");
|
||||
private static String requiredArg(String[] args, String name) {
|
||||
for (int i = 0; i < args.length - 1; i++) {
|
||||
if (args[i].equals(name)) return args[i + 1];
|
||||
}
|
||||
System.err.println("Missing required argument: " + name);
|
||||
System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N]");
|
||||
System.exit(1);
|
||||
return null; // unreachable
|
||||
}
|
||||
|
||||
sendResponse(exchange, 200, gson.toJson(response));
|
||||
private static String optionalArg(String[] args, String name, String defaultValue) {
|
||||
for (int i = 0; i < args.length - 1; i++) {
|
||||
if (args[i].equals(name)) return args[i + 1];
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
private static String requiredEnv(String name) {
|
||||
String val = System.getenv(name);
|
||||
if (val == null) {
|
||||
System.err.println("Missing required environment variable: " + name);
|
||||
System.exit(1);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
112
r5-java/src/main/java/propertymap/Parquet.java
Normal file
112
r5-java/src/main/java/propertymap/Parquet.java
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
package propertymap;
|
||||
|
||||
import org.duckdb.DuckDBAppender;
|
||||
import org.duckdb.DuckDBConnection;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** DuckDB-based parquet I/O. */
|
||||
public class Parquet {
|
||||
|
||||
record Postcodes(String[] codes, double[] lats, double[] lons) {}
|
||||
|
||||
static {
|
||||
try { Class.forName("org.duckdb.DuckDBDriver"); }
|
||||
catch (ClassNotFoundException e) { throw new RuntimeException(e); }
|
||||
}
|
||||
|
||||
/** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
|
||||
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
|
||||
+ parquetPath + "') WHERE ctry = 'E92000001'");
|
||||
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
|
||||
rs.next();
|
||||
int n = rs.getInt(1);
|
||||
String[] codes = new String[n];
|
||||
double[] lats = new double[n];
|
||||
double[] lons = new double[n];
|
||||
|
||||
try (ResultSet data = stmt.executeQuery("SELECT pcds, lat, \"long\" FROM postcodes")) {
|
||||
int i = 0;
|
||||
while (data.next()) {
|
||||
codes[i] = data.getString(1);
|
||||
lats[i] = data.getDouble(2);
|
||||
lons[i] = data.getDouble(3);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return new Postcodes(codes, lats, lons);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
|
||||
static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
|
||||
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
|
||||
+ "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
|
||||
copyToParquet(stmt, "SELECT * FROM places", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
|
||||
rs.next();
|
||||
int n = rs.getInt(1);
|
||||
// Return as [lats, lons] flat arrays
|
||||
double[] lats = new double[n];
|
||||
double[] lons = new double[n];
|
||||
|
||||
try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
|
||||
int i = 0;
|
||||
while (data.next()) {
|
||||
lats[i] = data.getDouble(1);
|
||||
lons[i] = data.getDouble(2);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return new double[][]{lats, lons};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */
|
||||
static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times)
|
||||
throws Exception {
|
||||
Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("DROP TABLE IF EXISTS t");
|
||||
stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT)");
|
||||
}
|
||||
try (DuckDBAppender appender = conn.createAppender("main", "t")) {
|
||||
for (int i = 0; i < postcodes.length; i++) {
|
||||
appender.beginRow();
|
||||
appender.append(postcodes[i]);
|
||||
appender.append(times[i]);
|
||||
appender.endRow();
|
||||
}
|
||||
}
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
}
|
||||
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||
}
|
||||
|
||||
/** Create a new in-memory DuckDB connection (for use as a per-thread reusable connection). */
|
||||
static DuckDBConnection connect() throws Exception {
|
||||
return (DuckDBConnection) DriverManager.getConnection("jdbc:duckdb:");
|
||||
}
|
||||
|
||||
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
|
||||
stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
|
||||
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
}
|
||||
}
|
||||
211
r5-java/src/main/java/propertymap/Router.java
Normal file
211
r5-java/src/main/java/propertymap/Router.java
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
package propertymap;
|
||||
|
||||
import com.conveyal.r5.OneOriginResult;
|
||||
import com.conveyal.r5.analyst.FreeFormPointSet;
|
||||
import com.conveyal.r5.analyst.PointSet;
|
||||
import com.conveyal.r5.analyst.TravelTimeComputer;
|
||||
import com.conveyal.r5.analyst.WebMercatorExtents;
|
||||
import com.conveyal.r5.analyst.cluster.RegionalTask;
|
||||
import com.conveyal.r5.analyst.cluster.TravelTimeResult;
|
||||
import com.conveyal.r5.api.util.LegMode;
|
||||
import com.conveyal.r5.api.util.TransitModes;
|
||||
import com.conveyal.r5.kryo.KryoNetworkSerializer;
|
||||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import org.locationtech.jts.geom.Coordinate;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
/** R5 routing: network loading, point set construction, travel time computation. */
|
||||
public class Router {
|
||||
|
||||
private static final int ZOOM = 9;
|
||||
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
|
||||
|
||||
/**
|
||||
* A chunk of destinations that fits within R5's grid cell limit at zoom 9.
|
||||
* originalIndices maps each position in this chunk back to the full destinations array.
|
||||
*/
|
||||
record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
|
||||
|
||||
/** Load or build the transport network with Kryo caching. */
|
||||
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
|
||||
System.err.println("Loading transport network...");
|
||||
File cacheFile = new File(cacheDir, "network.dat");
|
||||
TransportNetwork network;
|
||||
|
||||
if (cacheFile.exists()) {
|
||||
System.err.println(" Loading cached network from " + cacheFile);
|
||||
network = KryoNetworkSerializer.read(cacheFile);
|
||||
} else {
|
||||
System.err.println(" Building network (first run, takes a few minutes)...");
|
||||
network = TransportNetwork.fromDirectory(new File(dataDir));
|
||||
new File(cacheDir).mkdirs();
|
||||
KryoNetworkSerializer.write(network, cacheFile);
|
||||
System.err.println(" Cached to " + cacheFile);
|
||||
}
|
||||
|
||||
System.err.println(" Building distance tables...");
|
||||
network.transitLayer.buildDistanceTables(null);
|
||||
System.err.println(" Network ready");
|
||||
return network;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
|
||||
* Sorts by latitude and splits into bands so each band's bounding box at zoom 9 is under 5M cells.
|
||||
*/
|
||||
static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
|
||||
int n = lats.length;
|
||||
|
||||
// Sort indices by latitude for geographic chunking
|
||||
Integer[] sorted = new Integer[n];
|
||||
for (int i = 0; i < n; i++) sorted[i] = i;
|
||||
Arrays.sort(sorted, (a, b) -> Double.compare(lats[a], lats[b]));
|
||||
|
||||
// Determine grid width (longitude span is the same for all chunks)
|
||||
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
|
||||
for (double lon : lons) {
|
||||
minLon = Math.min(minLon, lon);
|
||||
maxLon = Math.max(maxLon, lon);
|
||||
}
|
||||
int totalPixels = 256 << ZOOM;
|
||||
int gridWidth = lonToPixel(maxLon, totalPixels) - lonToPixel(minLon, totalPixels) + 1;
|
||||
int maxHeight = MAX_GRID_CELLS / gridWidth;
|
||||
|
||||
// Greedily build chunks: extend each band until it would exceed maxHeight
|
||||
List<DestinationChunk> chunks = new ArrayList<>();
|
||||
int start = 0;
|
||||
while (start < n) {
|
||||
int end = start + 1;
|
||||
int topPixel = latToPixel(lats[sorted[start]], totalPixels);
|
||||
|
||||
while (end < n) {
|
||||
int bottomPixel = latToPixel(lats[sorted[end]], totalPixels);
|
||||
if (Math.abs(bottomPixel - topPixel) + 1 > maxHeight) break;
|
||||
end++;
|
||||
}
|
||||
|
||||
chunks.add(buildChunk(lats, lons, sorted, start, end));
|
||||
start = end;
|
||||
}
|
||||
|
||||
System.err.printf(" Split into %d chunks at zoom %d (grid width %d, max height %d)%n",
|
||||
chunks.size(), ZOOM, gridWidth, maxHeight);
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/** Compute travel times from one origin to all destinations across all chunks. */
|
||||
static short[] computeTravelTimes(
|
||||
TransportNetwork network, List<DestinationChunk> chunks,
|
||||
double originLat, double originLon, String mode, int nDest, LocalDate date) {
|
||||
|
||||
short[] times = new short[nDest];
|
||||
Arrays.fill(times, (short) -1);
|
||||
|
||||
for (DestinationChunk chunk : chunks) {
|
||||
RegionalTask task = buildTask(chunk, originLat, originLon, mode, date);
|
||||
TravelTimeComputer computer = new TravelTimeComputer(task, network);
|
||||
OneOriginResult result = computer.computeTravelTimes();
|
||||
|
||||
TravelTimeResult tt = result.travelTimes;
|
||||
if (tt != null) {
|
||||
int[][] values = tt.getValues();
|
||||
for (int i = 0; i < chunk.originalIndices.length && i < values[0].length; i++) {
|
||||
if (values[0][i] != Integer.MAX_VALUE) {
|
||||
times[chunk.originalIndices[i]] = (short) values[0][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return times;
|
||||
}
|
||||
|
||||
private static DestinationChunk buildChunk(
|
||||
double[] lats, double[] lons, Integer[] sorted, int start, int end) {
|
||||
int size = end - start;
|
||||
int[] originalIndices = new int[size];
|
||||
Coordinate[] coords = new Coordinate[size];
|
||||
double minLat = Double.MAX_VALUE, maxLat = -Double.MAX_VALUE;
|
||||
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
int idx = sorted[start + i];
|
||||
originalIndices[i] = idx;
|
||||
double lat = lats[idx], lon = lons[idx];
|
||||
coords[i] = new Coordinate(lon, lat); // x=lon, y=lat
|
||||
minLat = Math.min(minLat, lat);
|
||||
maxLat = Math.max(maxLat, lat);
|
||||
minLon = Math.min(minLon, lon);
|
||||
maxLon = Math.max(maxLon, lon);
|
||||
}
|
||||
|
||||
FreeFormPointSet pointSet = new FreeFormPointSet(coords);
|
||||
int totalPixels = 256 << ZOOM;
|
||||
int west = lonToPixel(minLon, totalPixels);
|
||||
int north = latToPixel(maxLat, totalPixels);
|
||||
int width = lonToPixel(maxLon, totalPixels) - west + 1;
|
||||
int height = latToPixel(minLat, totalPixels) - north + 1;
|
||||
WebMercatorExtents extents = new WebMercatorExtents(west, north, width, height, ZOOM);
|
||||
|
||||
return new DestinationChunk(pointSet, extents, originalIndices);
|
||||
}
|
||||
|
||||
private static RegionalTask buildTask(
|
||||
DestinationChunk chunk, double originLat, double originLon, String mode, LocalDate date) {
|
||||
RegionalTask task = new RegionalTask();
|
||||
task.fromLat = originLat;
|
||||
task.fromLon = originLon;
|
||||
task.date = date;
|
||||
task.percentiles = new int[]{50};
|
||||
task.recordTimes = true;
|
||||
task.destinationPointSets = new PointSet[]{chunk.pointSet};
|
||||
task.zoom = chunk.extents.zoom;
|
||||
task.west = chunk.extents.west;
|
||||
task.north = chunk.extents.north;
|
||||
task.width = chunk.extents.width;
|
||||
task.height = chunk.extents.height;
|
||||
task.fromTime = 8 * 3600;
|
||||
task.toTime = 8 * 3600 + 60;
|
||||
task.maxTripDurationMinutes = 120;
|
||||
|
||||
configureMode(task, mode);
|
||||
return task;
|
||||
}
|
||||
|
||||
private static void configureMode(RegionalTask task, String mode) {
|
||||
switch (mode) {
|
||||
case "car" -> setDirectMode(task, LegMode.CAR);
|
||||
case "bicycle" -> setDirectMode(task, LegMode.BICYCLE);
|
||||
case "walking" -> setDirectMode(task, LegMode.WALK);
|
||||
case "transit" -> {
|
||||
task.maxRides = 4;
|
||||
task.accessModes = EnumSet.of(LegMode.WALK);
|
||||
task.egressModes = EnumSet.of(LegMode.WALK);
|
||||
task.directModes = EnumSet.of(LegMode.WALK);
|
||||
task.transitModes = EnumSet.of(TransitModes.TRANSIT);
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
|
||||
}
|
||||
}
|
||||
|
||||
private static void setDirectMode(RegionalTask task, LegMode legMode) {
|
||||
task.accessModes = EnumSet.of(legMode);
|
||||
task.egressModes = EnumSet.of(legMode);
|
||||
task.directModes = EnumSet.of(legMode);
|
||||
task.transitModes = EnumSet.noneOf(TransitModes.class);
|
||||
}
|
||||
|
||||
private static int lonToPixel(double lon, int totalPixels) {
|
||||
return (int) Math.floor(totalPixels * (lon + 180.0) / 360.0);
|
||||
}
|
||||
|
||||
private static int latToPixel(double lat, int totalPixels) {
|
||||
double latRad = Math.toRadians(lat);
|
||||
return (int) Math.floor(totalPixels * (1.0 - Math.log(Math.tan(latRad) + 1.0 / Math.cos(latRad)) / Math.PI) / 2.0);
|
||||
}
|
||||
}
|
||||
62
uv.lock
generated
62
uv.lock
generated
|
|
@ -140,6 +140,15 @@ css = [
|
|||
{ name = "tinycss2", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "branca"
|
||||
version = "0.8.2"
|
||||
|
|
@ -379,6 +388,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake-useragent"
|
||||
version = "2.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/41/43/948d10bf42735709edb5ae51e23297d034086f17fc7279fef385a7acb473/fake_useragent-2.2.0.tar.gz", hash = "sha256:4e6ab6571e40cc086d788523cf9e018f618d07f9050f822ff409a4dfe17c16b2", size = 158898, upload-time = "2025-04-14T15:32:19.238Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/51/37/b3ea9cd5558ff4cb51957caca2193981c6b0ff30bd0d2630ac62505d99d0/fake_useragent-2.2.0-py3-none-any.whl", hash = "sha256:67f35ca4d847b0d298187443aaf020413746e56acd985a611908c73dba2daa24", size = 161695, upload-time = "2025-04-14T15:32:17.732Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastexcel"
|
||||
version = "0.19.0"
|
||||
|
|
@ -400,6 +418,23 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flask"
|
||||
version = "3.1.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "blinker", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "click", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "itsdangerous", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "jinja2", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "markupsafe", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "werkzeug", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "folium"
|
||||
version = "0.20.0"
|
||||
|
|
@ -593,6 +628,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321, upload-time = "2020-11-01T10:59:58.02Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itsdangerous"
|
||||
version = "2.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jedi"
|
||||
version = "0.19.2"
|
||||
|
|
@ -1367,7 +1411,9 @@ name = "property-map"
|
|||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "fake-useragent", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "fastexcel", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "flask", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "folium", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "httpx", extra = ["socks"], marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
{ name = "ipywidgets", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
|
|
@ -1397,8 +1443,11 @@ dev = [
|
|||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "fake-useragent", specifier = ">=2.2.0" },
|
||||
{ name = "fastexcel", specifier = ">=0.19.0" },
|
||||
{ name = "flask" },
|
||||
{ name = "folium", specifier = ">=0.20.0" },
|
||||
{ name = "httpx" },
|
||||
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
|
||||
{ name = "ipywidgets", specifier = ">=8.0.0" },
|
||||
{ name = "jupyter", specifier = ">=1.0.0" },
|
||||
|
|
@ -1407,6 +1456,7 @@ requires-dist = [
|
|||
{ name = "osmium", specifier = ">=4.0.0" },
|
||||
{ name = "pandas", specifier = ">=2.0.0" },
|
||||
{ name = "plotly", specifier = ">=6.5.2" },
|
||||
{ name = "polars" },
|
||||
{ name = "polars", specifier = ">=1.37.1" },
|
||||
{ name = "pyarrow", specifier = ">=15.0.0" },
|
||||
{ name = "pyproj", specifier = ">=3.7.2" },
|
||||
|
|
@ -2127,6 +2177,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "werkzeug"
|
||||
version = "3.1.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markupsafe", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754, upload-time = "2026-01-08T17:49:23.247Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "widgetsnbextension"
|
||||
version = "4.0.15"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue