Merge branch 'main' of https://github.com/rubyhrzhang/property-map
This commit is contained in:
commit
88377cc7f6
46 changed files with 16523 additions and 10 deletions
82
.github/workflows/ci.yml
vendored
Normal file
82
.github/workflows/ci.yml
vendored
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
lint-python:
|
||||
name: Lint Python
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --dev
|
||||
|
||||
- name: Run ruff check
|
||||
run: uv run ruff check .
|
||||
|
||||
- name: Run ruff format check
|
||||
run: uv run ruff format --check .
|
||||
|
||||
lint-frontend:
|
||||
name: Lint Frontend
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: frontend
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "npm"
|
||||
cache-dependency-path: frontend/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run ESLint
|
||||
run: npm run lint
|
||||
|
||||
- name: Run Prettier check
|
||||
run: npm run format:check
|
||||
|
||||
- name: Run TypeScript check
|
||||
run: npm run typecheck
|
||||
|
||||
build-frontend:
|
||||
name: Build Frontend
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint-frontend]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: frontend
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "npm"
|
||||
cache-dependency-path: frontend/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
28
.github/workflows/lint.yml
vendored
Normal file
28
.github/workflows/lint.yml
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync
|
||||
|
||||
- name: Check linting
|
||||
run: uv run ruff check .
|
||||
|
||||
- name: Check formatting
|
||||
run: uv run ruff format --check .
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -1,3 +1,7 @@
|
|||
data_sources
|
||||
.venv
|
||||
.claude
|
||||
.claude
|
||||
tfl_journey_client
|
||||
**/node_modules
|
||||
**/__pycache__
|
||||
**/dist
|
||||
|
|
|
|||
7
.vscode/settings.json
vendored
Normal file
7
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"files.exclude": {
|
||||
"*.venv": true,
|
||||
"**/__pycache__": true,
|
||||
"**/node_modules": true
|
||||
}
|
||||
}
|
||||
1170
Journey.yaml
Normal file
1170
Journey.yaml
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,12 @@
|
|||
# Property Map
|
||||
|
||||
## Dev setup
|
||||
|
||||
```sh
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/task/task/setup.deb.sh' | sudo -E bash
|
||||
task prepare
|
||||
```
|
||||
|
||||
## Area
|
||||
|
||||
1. 45 min commute (perhaps near train station)
|
||||
|
|
@ -45,3 +52,5 @@
|
|||
- [Population by Ethnicity and Region 2021](https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/#download-the-data)
|
||||
|
||||
- [Crime](https://data.police.uk/data/)
|
||||
|
||||
- [Postcode -> GPS](https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data)
|
||||
|
|
|
|||
101
Taskfile.yml
Normal file
101
Taskfile.yml
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
install:
|
||||
desc: Install dependencies, generate client, and download data
|
||||
cmds:
|
||||
- uv sync
|
||||
- cd frontend && npm install
|
||||
|
||||
download:
|
||||
desc: Download data
|
||||
deps:
|
||||
- install
|
||||
cmds:
|
||||
- uv run python generate_tfl_client.py
|
||||
- uv run python download_land_registry.py
|
||||
- uv run python download_arcgis_data.py
|
||||
|
||||
pipeline:
|
||||
desc: Run data processing pipeline
|
||||
deps:
|
||||
- download
|
||||
cmds:
|
||||
- uv run python -m pipeline.run
|
||||
|
||||
prepare:
|
||||
desc: Prepare the application (install, download data, run pipeline)
|
||||
deps:
|
||||
- pipeline
|
||||
|
||||
server:
|
||||
desc: Run FastAPI backend on port 8001
|
||||
cmds:
|
||||
- uv run fastapi dev server/main.py --port 8001
|
||||
|
||||
frontend:
|
||||
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
|
||||
dir: frontend
|
||||
cmds:
|
||||
- npm run dev
|
||||
|
||||
build:
|
||||
desc: Build frontend for production
|
||||
dir: frontend
|
||||
cmds:
|
||||
- npm run build
|
||||
|
||||
prod:
|
||||
desc: Run production server (serves built frontend)
|
||||
cmds:
|
||||
- uv run fastapi run server/main.py --port 8001
|
||||
|
||||
lint:
|
||||
desc: Lint all code (Python and TypeScript)
|
||||
cmds:
|
||||
- task: lint:python
|
||||
- task: lint:frontend
|
||||
|
||||
lint:python:
|
||||
desc: Lint Python code with ruff
|
||||
cmds:
|
||||
- uv run ruff check .
|
||||
|
||||
lint:frontend:
|
||||
desc: Lint frontend TypeScript code
|
||||
dir: frontend
|
||||
cmds:
|
||||
- npm run lint
|
||||
- npm run format:check
|
||||
|
||||
format:
|
||||
desc: Format all code (Python and TypeScript)
|
||||
cmds:
|
||||
- task: format:python
|
||||
- task: format:frontend
|
||||
|
||||
format:python:
|
||||
desc: Format Python code with ruff
|
||||
cmds:
|
||||
- uv run ruff check --fix .
|
||||
- uv run ruff format .
|
||||
|
||||
format:frontend:
|
||||
desc: Format frontend TypeScript code
|
||||
dir: frontend
|
||||
cmds:
|
||||
- npm run lint:fix
|
||||
- npm run format
|
||||
|
||||
check:
|
||||
desc: Run all checks (lint, typecheck, build)
|
||||
cmds:
|
||||
- task: lint
|
||||
- task: typecheck
|
||||
- task: build
|
||||
|
||||
typecheck:
|
||||
desc: Type check frontend TypeScript code
|
||||
dir: frontend
|
||||
cmds:
|
||||
- npm run typecheck
|
||||
129
download_arcgis_data.py
Normal file
129
download_arcgis_data.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Download ArcGIS data and convert to Parquet."""
|
||||
|
||||
# Run it with:
|
||||
# uv run download_arcgis_data.py
|
||||
|
||||
import time
|
||||
import zipfile
|
||||
import httpx
|
||||
import polars as pl
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
URL = "https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data"
|
||||
|
||||
BASE_DATA_PATH = Path("./data_sources")
|
||||
BASE_DATA_PATH.mkdir(exist_ok=True)
|
||||
DOWNLOAD_PATH = BASE_DATA_PATH / "arcgis_data.zip"
|
||||
EXTRACT_PATH = BASE_DATA_PATH / "arcgis_extracted"
|
||||
PARQUET_PATH = BASE_DATA_PATH / "arcgis_data.parquet"
|
||||
|
||||
MAX_RETRIES = 3
|
||||
|
||||
|
||||
def download_with_progress(url: str, output_path: Path) -> None:
|
||||
"""Download a file with progress bar and retry logic."""
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
with httpx.stream(
|
||||
"GET",
|
||||
url,
|
||||
follow_redirects=True,
|
||||
timeout=httpx.Timeout(30.0, read=None),
|
||||
) as response:
|
||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
||||
total = int(response.headers.get("content-length", 0))
|
||||
|
||||
with (
|
||||
open(output_path, "wb") as f,
|
||||
tqdm(
|
||||
total=total,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
desc="Downloading",
|
||||
) as pbar,
|
||||
):
|
||||
for chunk in response.iter_bytes(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
return # Success
|
||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
||||
if attempt < MAX_RETRIES:
|
||||
wait = 2**attempt
|
||||
print(f"Attempt {attempt} failed: {e}. Retrying in {wait}s...")
|
||||
time.sleep(wait)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def extract_zip(zip_path: Path, extract_path: Path) -> list[Path]:
|
||||
"""Extract ZIP file and return list of extracted files."""
|
||||
print("Extracting ZIP file...")
|
||||
extract_path.mkdir(exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
zf.extractall(extract_path)
|
||||
return [extract_path / name for name in zf.namelist()]
|
||||
|
||||
|
||||
def find_data_file(extract_path: Path) -> Path:
|
||||
"""Find the main data file (CSV, XLSX, or similar) in extracted files."""
|
||||
# Look for common data file extensions
|
||||
for ext in ["*.csv", "*.xlsx", "*.xls", "*.json", "*.geojson"]:
|
||||
files = list(extract_path.rglob(ext))
|
||||
if files:
|
||||
# Return the largest file if multiple found
|
||||
return max(files, key=lambda f: f.stat().st_size)
|
||||
|
||||
raise FileNotFoundError(f"No data file found in {extract_path}")
|
||||
|
||||
|
||||
def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
|
||||
"""Convert data file to Parquet using Polars."""
|
||||
print(f"Converting {data_path.name} to Parquet...")
|
||||
|
||||
suffix = data_path.suffix.lower()
|
||||
|
||||
if suffix == ".csv":
|
||||
df = pl.read_csv(data_path, try_parse_dates=True)
|
||||
elif suffix in [".xlsx", ".xls"]:
|
||||
df = pl.read_excel(data_path)
|
||||
elif suffix in [".json", ".geojson"]:
|
||||
df = pl.read_json(data_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file format: {suffix}")
|
||||
|
||||
df.write_parquet(parquet_path, compression="zstd")
|
||||
print(f"Saved to {parquet_path}")
|
||||
print(f"Rows: {df.height:,}")
|
||||
print(f"Columns: {df.columns}")
|
||||
print(f"Original size: {data_path.stat().st_size / 1024**2:.1f} MB")
|
||||
print(f"Parquet size: {parquet_path.stat().st_size / 1024**2:.1f} MB")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if PARQUET_PATH.exists():
|
||||
print(f"Parquet already exists at {PARQUET_PATH}, skipping")
|
||||
return
|
||||
|
||||
if not DOWNLOAD_PATH.exists():
|
||||
download_with_progress(URL, DOWNLOAD_PATH)
|
||||
else:
|
||||
print(f"File already exists at {DOWNLOAD_PATH}, skipping download")
|
||||
|
||||
# Check if it's a ZIP file
|
||||
if zipfile.is_zipfile(DOWNLOAD_PATH):
|
||||
extracted_files = extract_zip(DOWNLOAD_PATH, EXTRACT_PATH)
|
||||
print(f"Extracted {len(extracted_files)} files")
|
||||
data_file = find_data_file(EXTRACT_PATH)
|
||||
else:
|
||||
# Not a ZIP, treat as direct data file
|
||||
data_file = DOWNLOAD_PATH
|
||||
|
||||
convert_to_parquet(data_file, PARQUET_PATH)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
"""Download Land Registry price paid data and convert to Parquet."""
|
||||
|
||||
# Run it with:
|
||||
# uv run --with httpx --with polars --with tqdm python download_land_registry.py
|
||||
# uv run download_land_registry.py
|
||||
|
||||
# The download failed in this environment due to network restrictions, but the script will work on your local machine. The ~5GB CSV should compress to roughly ~1GB in Parquet format with ZSTD compression.
|
||||
|
||||
|
|
@ -32,16 +32,19 @@ def download_with_progress(url: str, output_path: Path) -> None:
|
|||
follow_redirects=True,
|
||||
timeout=httpx.Timeout(30.0, read=None),
|
||||
) as response:
|
||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
||||
total = int(response.headers.get("content-length", 0))
|
||||
|
||||
with open(output_path, "wb") as f, tqdm(
|
||||
total=total,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
desc="Downloading",
|
||||
) as pbar:
|
||||
with (
|
||||
open(output_path, "wb") as f,
|
||||
tqdm(
|
||||
total=total,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
desc="Downloading",
|
||||
) as pbar,
|
||||
):
|
||||
for chunk in response.iter_bytes(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
|
|
@ -95,6 +98,10 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
|
|||
|
||||
|
||||
def main() -> None:
|
||||
if PARQUET_PATH.exists():
|
||||
print(f"Parquet already exists at {PARQUET_PATH}, skipping")
|
||||
return
|
||||
|
||||
if not CSV_PATH.exists():
|
||||
download_with_progress(URL, CSV_PATH)
|
||||
else:
|
||||
|
|
|
|||
3
frontend/.babelrc
Normal file
3
frontend/.babelrc
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"presets": ["@babel/preset-react"]
|
||||
}
|
||||
31
frontend/.eslintrc.json
Normal file
31
frontend/.eslintrc.json
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"env": {
|
||||
"browser": true,
|
||||
"es2021": true
|
||||
},
|
||||
"extends": [
|
||||
"eslint:recommended",
|
||||
"plugin:react/recommended",
|
||||
"plugin:react-hooks/recommended",
|
||||
"plugin:@typescript-eslint/recommended"
|
||||
],
|
||||
"parser": "@typescript-eslint/parser",
|
||||
"parserOptions": {
|
||||
"ecmaFeatures": {
|
||||
"jsx": true
|
||||
},
|
||||
"ecmaVersion": "latest",
|
||||
"sourceType": "module"
|
||||
},
|
||||
"plugins": ["react", "react-hooks", "@typescript-eslint"],
|
||||
"settings": {
|
||||
"react": {
|
||||
"version": "detect"
|
||||
}
|
||||
},
|
||||
"rules": {
|
||||
"react/react-in-jsx-scope": "off",
|
||||
"react/prop-types": "off",
|
||||
"@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
|
||||
}
|
||||
}
|
||||
7
frontend/.prettierrc
Normal file
7
frontend/.prettierrc
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"semi": true,
|
||||
"singleQuote": true,
|
||||
"tabWidth": 2,
|
||||
"trailingComma": "es5",
|
||||
"printWidth": 100
|
||||
}
|
||||
11436
frontend/package-lock.json
generated
Normal file
11436
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
51
frontend/package.json
Normal file
51
frontend/package.json
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
{
|
||||
"name": "property-map-frontend",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"dev": "webpack serve --mode development --port 3030",
|
||||
"build": "webpack --mode production",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"lint": "eslint src --ext .ts,.tsx",
|
||||
"lint:fix": "eslint src --ext .ts,.tsx --fix",
|
||||
"format": "prettier --write \"src/**/*.{ts,tsx,css}\"",
|
||||
"format:check": "prettier --check \"src/**/*.{ts,tsx,css}\""
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"@deck.gl/core": "^9.0.0",
|
||||
"@deck.gl/layers": "^9.0.0",
|
||||
"@deck.gl/geo-layers": "^9.0.0",
|
||||
"@deck.gl/react": "^9.0.0",
|
||||
"maplibre-gl": "^4.0.0",
|
||||
"react-map-gl": "^7.1.0",
|
||||
"@radix-ui/react-slider": "^1.1.0",
|
||||
"@radix-ui/react-select": "^2.0.0",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"clsx": "^2.1.0",
|
||||
"tailwind-merge": "^2.2.0",
|
||||
"tailwindcss-animate": "^1.0.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"webpack": "^5.90.0",
|
||||
"webpack-cli": "^5.1.0",
|
||||
"webpack-dev-server": "^5.0.0",
|
||||
"html-webpack-plugin": "^5.6.0",
|
||||
"css-loader": "^7.0.0",
|
||||
"style-loader": "^4.0.0",
|
||||
"postcss-loader": "^8.0.0",
|
||||
"ts-loader": "^9.5.0",
|
||||
"typescript": "^5.4.0",
|
||||
"@types/react": "^18.2.0",
|
||||
"@types/react-dom": "^18.2.0",
|
||||
"tailwindcss": "^3.4.0",
|
||||
"autoprefixer": "^10.4.0",
|
||||
"postcss": "^8.4.0",
|
||||
"eslint": "^8.57.0",
|
||||
"@typescript-eslint/eslint-plugin": "^7.0.0",
|
||||
"@typescript-eslint/parser": "^7.0.0",
|
||||
"eslint-plugin-react": "^7.34.0",
|
||||
"eslint-plugin-react-hooks": "^4.6.0",
|
||||
"prettier": "^3.2.0"
|
||||
}
|
||||
}
|
||||
6
frontend/postcss.config.js
Normal file
6
frontend/postcss.config.js
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
module.exports = {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
};
|
||||
93
frontend/src/App.tsx
Normal file
93
frontend/src/App.tsx
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { useState, useEffect, useCallback, useRef } from 'react';
|
||||
import Map from './components/Map';
|
||||
import Filters from './components/Filters';
|
||||
import { DEFAULT_FILTERS } from './lib/constants';
|
||||
import type {
|
||||
Filters as FiltersType,
|
||||
Bounds,
|
||||
HexagonData,
|
||||
ViewChangeParams,
|
||||
ApiResponse,
|
||||
} from './types';
|
||||
|
||||
const DEBOUNCE_MS = 150;
|
||||
|
||||
export default function App() {
|
||||
const [filters, setFilters] = useState<FiltersType>(DEFAULT_FILTERS);
|
||||
const [data, setData] = useState<HexagonData[]>([]);
|
||||
const [resolution, setResolution] = useState<number>(8);
|
||||
const [bounds, setBounds] = useState<Bounds | null>(null);
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
const [zoom, setZoom] = useState<number>(6);
|
||||
const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
|
||||
// Debounced fetch when dependencies change
|
||||
useEffect(() => {
|
||||
if (!bounds) return;
|
||||
|
||||
// Clear previous debounce timer
|
||||
if (debounceRef.current) {
|
||||
clearTimeout(debounceRef.current);
|
||||
}
|
||||
|
||||
debounceRef.current = setTimeout(async () => {
|
||||
// Cancel any in-flight request
|
||||
if (abortControllerRef.current) {
|
||||
abortControllerRef.current.abort();
|
||||
}
|
||||
abortControllerRef.current = new AbortController();
|
||||
|
||||
setLoading(true);
|
||||
try {
|
||||
const boundsStr = `${bounds.south},${bounds.west},${bounds.north},${bounds.east}`;
|
||||
const params = new URLSearchParams({
|
||||
resolution: resolution.toString(),
|
||||
min_year: filters.minYear.toString(),
|
||||
max_year: filters.maxYear.toString(),
|
||||
min_price: filters.minPrice.toString(),
|
||||
max_price: filters.maxPrice.toString(),
|
||||
bounds: boundsStr,
|
||||
});
|
||||
const res = await fetch(`/api/hexagons?${params}`, {
|
||||
signal: abortControllerRef.current.signal,
|
||||
});
|
||||
const json: ApiResponse = await res.json();
|
||||
setData(json.features || []);
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name !== 'AbortError') {
|
||||
console.error('Failed to fetch data:', err);
|
||||
}
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, DEBOUNCE_MS);
|
||||
|
||||
return () => {
|
||||
if (debounceRef.current) {
|
||||
clearTimeout(debounceRef.current);
|
||||
}
|
||||
};
|
||||
}, [filters, resolution, bounds]);
|
||||
|
||||
const handleViewChange = useCallback(
|
||||
({ resolution: newRes, bounds: newBounds, zoom: newZoom }: ViewChangeParams) => {
|
||||
setResolution(newRes);
|
||||
setBounds(newBounds);
|
||||
setZoom(newZoom);
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="h-screen flex">
|
||||
<Filters filters={filters} onChange={setFilters} zoom={zoom} />
|
||||
<div className="flex-1 relative">
|
||||
<Map data={data} onViewChange={handleViewChange} />
|
||||
{loading && (
|
||||
<div className="absolute top-4 right-4 bg-white px-3 py-1 rounded shadow">Loading...</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
74
frontend/src/components/Filters.tsx
Normal file
74
frontend/src/components/Filters.tsx
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
import { Slider } from './ui/slider';
|
||||
import { Label } from './ui/label';
|
||||
import { YEAR_MIN, YEAR_MAX, YEAR_STEP, PRICE_MIN, PRICE_MAX, PRICE_STEP } from '../lib/constants';
|
||||
import type { Filters as FiltersType } from '../types';
|
||||
|
||||
interface FiltersProps {
|
||||
filters: FiltersType;
|
||||
onChange: (filters: FiltersType) => void;
|
||||
zoom: number;
|
||||
}
|
||||
|
||||
export default function Filters({ filters, onChange, zoom }: FiltersProps) {
|
||||
const update = (key: keyof FiltersType, value: number) => onChange({ ...filters, [key]: value });
|
||||
|
||||
return (
|
||||
<div className="w-72 p-4 bg-white shadow-lg space-y-6">
|
||||
<h1 className="text-xl font-bold">UK Property Prices</h1>
|
||||
|
||||
<div className="text-sm text-slate-500">Zoom: {zoom.toFixed(1)}</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<Label>
|
||||
Year Range: {filters.minYear} - {filters.maxYear}
|
||||
</Label>
|
||||
<Slider
|
||||
min={YEAR_MIN}
|
||||
max={YEAR_MAX}
|
||||
step={YEAR_STEP}
|
||||
value={[filters.minYear, filters.maxYear]}
|
||||
onValueChange={([min, max]) => onChange({ ...filters, minYear: min, maxYear: max })}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<Label>Min Price: £{filters.minPrice.toLocaleString()}</Label>
|
||||
<Slider
|
||||
min={PRICE_MIN}
|
||||
max={PRICE_MAX}
|
||||
step={PRICE_STEP}
|
||||
value={[filters.minPrice]}
|
||||
onValueChange={([v]) => update('minPrice', v)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<Label>Max Price: £{filters.maxPrice.toLocaleString()}</Label>
|
||||
<Slider
|
||||
min={PRICE_MIN}
|
||||
max={PRICE_MAX}
|
||||
step={PRICE_STEP}
|
||||
value={[filters.maxPrice]}
|
||||
onValueChange={([v]) => update('maxPrice', v)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="mt-6 p-3 bg-slate-100 rounded text-xs">
|
||||
<div className="mb-2 font-medium">Average Price</div>
|
||||
<div
|
||||
className="h-4 rounded"
|
||||
style={{
|
||||
background:
|
||||
'linear-gradient(to right, rgb(46, 204, 113), rgb(241, 196, 15), rgb(231, 76, 60), rgb(142, 68, 173))',
|
||||
}}
|
||||
></div>
|
||||
<div className="flex justify-between mt-1">
|
||||
<span>£0</span>
|
||||
<span>£200k</span>
|
||||
<span>£400k</span>
|
||||
<span>£800k+</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
185
frontend/src/components/Map.tsx
Normal file
185
frontend/src/components/Map.tsx
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import { useCallback, useRef, useEffect, useState, useMemo } from 'react';
|
||||
import { Map as MapGL } from 'react-map-gl/maplibre';
|
||||
import DeckGL from '@deck.gl/react';
|
||||
import { H3HexagonLayer } from '@deck.gl/geo-layers';
|
||||
import 'maplibre-gl/dist/maplibre-gl.css';
|
||||
import type { HexagonData, ViewState, ViewChangeParams, Bounds } from '../types';
|
||||
|
||||
interface MapProps {
|
||||
data: HexagonData[];
|
||||
onViewChange: (params: ViewChangeParams) => void;
|
||||
}
|
||||
|
||||
const INITIAL_VIEW: ViewState = {
|
||||
longitude: -1.5,
|
||||
latitude: 53.5,
|
||||
zoom: 6,
|
||||
pitch: 0,
|
||||
};
|
||||
|
||||
const MAP_STYLE = 'https://basemaps.cartocdn.com/gl/positron-gl-style/style.json';
|
||||
|
||||
interface ColorStop {
|
||||
price: number;
|
||||
color: [number, number, number];
|
||||
}
|
||||
|
||||
// Continuous color scale from green (low) -> yellow -> red -> purple (high)
|
||||
const COLOR_SCALE: ColorStop[] = [
|
||||
{ price: 0, color: [46, 204, 113] }, // Green
|
||||
{ price: 200000, color: [241, 196, 15] }, // Yellow
|
||||
{ price: 400000, color: [231, 76, 60] }, // Red
|
||||
{ price: 800000, color: [142, 68, 173] }, // Purple
|
||||
];
|
||||
|
||||
function interpolateColor(
|
||||
c1: [number, number, number],
|
||||
c2: [number, number, number],
|
||||
t: number
|
||||
): [number, number, number] {
|
||||
return [
|
||||
Math.round(c1[0] + (c2[0] - c1[0]) * t),
|
||||
Math.round(c1[1] + (c2[1] - c1[1]) * t),
|
||||
Math.round(c1[2] + (c2[2] - c1[2]) * t),
|
||||
];
|
||||
}
|
||||
|
||||
function priceToColor(price: number | null | undefined): [number, number, number] {
|
||||
if (price == null || isNaN(price)) return [128, 128, 128]; // Gray for missing data
|
||||
|
||||
// Clamp to scale range
|
||||
if (price <= COLOR_SCALE[0].price) return COLOR_SCALE[0].color;
|
||||
if (price >= COLOR_SCALE[COLOR_SCALE.length - 1].price) {
|
||||
return COLOR_SCALE[COLOR_SCALE.length - 1].color;
|
||||
}
|
||||
|
||||
// Find the two colors to interpolate between
|
||||
for (let i = 0; i < COLOR_SCALE.length - 1; i++) {
|
||||
const lower = COLOR_SCALE[i];
|
||||
const upper = COLOR_SCALE[i + 1];
|
||||
if (price >= lower.price && price <= upper.price) {
|
||||
const t = (price - lower.price) / (upper.price - lower.price);
|
||||
return interpolateColor(lower.color, upper.color, t);
|
||||
}
|
||||
}
|
||||
|
||||
return COLOR_SCALE[COLOR_SCALE.length - 1].color;
|
||||
}
|
||||
|
||||
function zoomToResolution(zoom: number): number {
|
||||
if (zoom < 7) return 6;
|
||||
if (zoom < 8.5) return 7;
|
||||
if (zoom < 9.5) return 8;
|
||||
if (zoom < 11) return 9;
|
||||
if (zoom < 13) return 10;
|
||||
return 11;
|
||||
}
|
||||
|
||||
function getBoundsFromViewState(viewState: ViewState, width: number, height: number): Bounds {
|
||||
const { longitude, latitude, zoom } = viewState;
|
||||
|
||||
// Clamp latitude to valid Mercator range to avoid math errors
|
||||
const clampedLat = Math.max(-85, Math.min(85, latitude));
|
||||
|
||||
// Web Mercator projection math
|
||||
const TILE_SIZE = 256;
|
||||
const scale = Math.pow(2, zoom);
|
||||
const worldSize = TILE_SIZE * scale;
|
||||
|
||||
// Longitude is linear
|
||||
const degreesPerPixelLng = 360 / worldSize;
|
||||
const halfWidthDeg = (width / 2) * degreesPerPixelLng;
|
||||
|
||||
// Latitude uses Mercator projection (non-linear)
|
||||
// Convert center lat to pixel y, offset by half height, convert back to lat
|
||||
const latRad = (clampedLat * Math.PI) / 180;
|
||||
const mercatorY = (1 - Math.log(Math.tan(latRad) + 1 / Math.cos(latRad)) / Math.PI) / 2;
|
||||
const centerPixelY = mercatorY * worldSize;
|
||||
|
||||
const topPixelY = centerPixelY - height / 2;
|
||||
const bottomPixelY = centerPixelY + height / 2;
|
||||
|
||||
// Convert pixel Y back to latitude
|
||||
const pixelYToLat = (pixelY: number): number => {
|
||||
const mercY = Math.max(0.001, Math.min(0.999, pixelY / worldSize)); // Clamp to avoid edge cases
|
||||
const latRadians = Math.atan(Math.sinh(Math.PI * (1 - 2 * mercY)));
|
||||
return (latRadians * 180) / Math.PI;
|
||||
};
|
||||
|
||||
const north = Math.min(85, pixelYToLat(topPixelY));
|
||||
const south = Math.max(-85, pixelYToLat(bottomPixelY));
|
||||
const west = Math.max(-180, longitude - halfWidthDeg);
|
||||
const east = Math.min(180, longitude + halfWidthDeg);
|
||||
|
||||
return { south, west, north, east };
|
||||
}
|
||||
|
||||
interface Dimensions {
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export default function Map({ data, onViewChange }: MapProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [viewState, setViewState] = useState<ViewState>(INITIAL_VIEW);
|
||||
const [dimensions, setDimensions] = useState<Dimensions>({ width: 0, height: 0 });
|
||||
|
||||
// Track container dimensions with ResizeObserver
|
||||
useEffect(() => {
|
||||
const container = containerRef.current;
|
||||
if (!container) return;
|
||||
|
||||
const observer = new ResizeObserver((entries) => {
|
||||
const { width, height } = entries[0].contentRect;
|
||||
if (width > 0 && height > 0) {
|
||||
setDimensions({ width, height });
|
||||
}
|
||||
});
|
||||
|
||||
observer.observe(container);
|
||||
return () => observer.disconnect();
|
||||
}, []);
|
||||
|
||||
// Notify parent when view or dimensions change
|
||||
useEffect(() => {
|
||||
if (dimensions.width === 0 || dimensions.height === 0) return;
|
||||
|
||||
const bounds = getBoundsFromViewState(viewState, dimensions.width, dimensions.height);
|
||||
const resolution = zoomToResolution(viewState.zoom);
|
||||
|
||||
onViewChange({ resolution, bounds, zoom: viewState.zoom });
|
||||
}, [viewState, dimensions, onViewChange]);
|
||||
|
||||
const handleViewStateChange = useCallback((params: { viewState: unknown }) => {
|
||||
const newViewState = params.viewState as ViewState;
|
||||
setViewState(newViewState);
|
||||
}, []);
|
||||
|
||||
const layers = useMemo(
|
||||
() => [
|
||||
new H3HexagonLayer<HexagonData>({
|
||||
id: 'h3-hexagons',
|
||||
data,
|
||||
getHexagon: (d) => d.h3,
|
||||
getFillColor: (d) => priceToColor(d.avg_price),
|
||||
extruded: false,
|
||||
pickable: true,
|
||||
opacity: 0.7,
|
||||
}),
|
||||
],
|
||||
[data]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex-1 h-full" ref={containerRef}>
|
||||
<DeckGL
|
||||
viewState={viewState}
|
||||
controller
|
||||
layers={layers}
|
||||
onViewStateChange={handleViewStateChange as never}
|
||||
>
|
||||
<MapGL mapStyle={MAP_STYLE} />
|
||||
</DeckGL>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
12
frontend/src/components/ui/label.tsx
Normal file
12
frontend/src/components/ui/label.tsx
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import type { ReactNode } from 'react';
|
||||
|
||||
interface LabelProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function Label({ children, className }: LabelProps) {
|
||||
return (
|
||||
<label className={`text-sm font-medium text-slate-700 ${className || ''}`}>{children}</label>
|
||||
);
|
||||
}
|
||||
25
frontend/src/components/ui/slider.tsx
Normal file
25
frontend/src/components/ui/slider.tsx
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import * as SliderPrimitive from '@radix-ui/react-slider';
|
||||
import { cn } from '../../lib/utils';
|
||||
|
||||
interface SliderProps extends React.ComponentPropsWithoutRef<typeof SliderPrimitive.Root> {
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function Slider({ className, ...props }: SliderProps) {
|
||||
return (
|
||||
<SliderPrimitive.Root
|
||||
className={cn('relative flex w-full touch-none select-none items-center', className)}
|
||||
{...props}
|
||||
>
|
||||
<SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-slate-200">
|
||||
<SliderPrimitive.Range className="absolute h-full bg-slate-900" />
|
||||
</SliderPrimitive.Track>
|
||||
{props.value?.map((_, i) => (
|
||||
<SliderPrimitive.Thumb
|
||||
key={i}
|
||||
className="block h-5 w-5 rounded-full border-2 border-slate-900 bg-white ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-slate-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
|
||||
/>
|
||||
))}
|
||||
</SliderPrimitive.Root>
|
||||
);
|
||||
}
|
||||
11
frontend/src/index.css
Normal file
11
frontend/src/index.css
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
html,
|
||||
body,
|
||||
#root {
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
11
frontend/src/index.html
Normal file
11
frontend/src/index.html
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>UK Property Prices Map</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
</body>
|
||||
</html>
|
||||
10
frontend/src/index.tsx
Normal file
10
frontend/src/index.tsx
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import { createRoot } from 'react-dom/client';
|
||||
import App from './App';
|
||||
import './index.css';
|
||||
|
||||
const container = document.getElementById('root');
|
||||
if (!container) {
|
||||
throw new Error('Root element not found');
|
||||
}
|
||||
const root = createRoot(container);
|
||||
root.render(<App />);
|
||||
19
frontend/src/lib/constants.ts
Normal file
19
frontend/src/lib/constants.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import type { Filters } from '../types';
|
||||
|
||||
// Filter configuration constants
|
||||
// Should match backend pipeline/config.py
|
||||
|
||||
export const YEAR_MIN = 1995;
|
||||
export const YEAR_MAX = 2024;
|
||||
export const YEAR_STEP = 1;
|
||||
|
||||
export const PRICE_MIN = 0;
|
||||
export const PRICE_MAX = 5000000; // £5M max for slider, but no server-side cap
|
||||
export const PRICE_STEP = 50000;
|
||||
|
||||
export const DEFAULT_FILTERS: Filters = {
|
||||
minYear: 2020,
|
||||
maxYear: YEAR_MAX,
|
||||
minPrice: PRICE_MIN,
|
||||
maxPrice: PRICE_MAX,
|
||||
};
|
||||
4
frontend/src/lib/utils.ts
Normal file
4
frontend/src/lib/utils.ts
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
import { clsx, type ClassValue } from 'clsx';
|
||||
import { twMerge } from 'tailwind-merge';
|
||||
|
||||
export const cn = (...inputs: ClassValue[]): string => twMerge(clsx(inputs));
|
||||
41
frontend/src/types.ts
Normal file
41
frontend/src/types.ts
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
export interface Filters {
|
||||
minYear: number;
|
||||
maxYear: number;
|
||||
minPrice: number;
|
||||
maxPrice: number;
|
||||
}
|
||||
|
||||
export interface Bounds {
|
||||
south: number;
|
||||
west: number;
|
||||
north: number;
|
||||
east: number;
|
||||
}
|
||||
|
||||
export interface HexagonData {
|
||||
h3: string;
|
||||
count: number;
|
||||
avg_price: number;
|
||||
median_price: number;
|
||||
min_price: number;
|
||||
max_price: number;
|
||||
}
|
||||
|
||||
export interface ViewState {
|
||||
longitude: number;
|
||||
latitude: number;
|
||||
zoom: number;
|
||||
pitch: number;
|
||||
bearing?: number;
|
||||
}
|
||||
|
||||
export interface ViewChangeParams {
|
||||
resolution: number;
|
||||
bounds: Bounds;
|
||||
zoom: number;
|
||||
}
|
||||
|
||||
export interface ApiResponse {
|
||||
features: HexagonData[];
|
||||
truncated: boolean;
|
||||
}
|
||||
7
frontend/tailwind.config.js
Normal file
7
frontend/tailwind.config.js
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
module.exports = {
|
||||
content: ['./src/**/*.{js,jsx,ts,tsx,html}'],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [require('tailwindcss-animate')],
|
||||
};
|
||||
22
frontend/tsconfig.json
Normal file
22
frontend/tsconfig.json
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"lib": ["DOM", "DOM.Iterable", "ES2020"],
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"jsx": "react-jsx",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
41
frontend/webpack.config.js
Normal file
41
frontend/webpack.config.js
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
const path = require('path');
|
||||
const HtmlWebpackPlugin = require('html-webpack-plugin');
|
||||
|
||||
module.exports = {
|
||||
entry: './src/index.tsx',
|
||||
output: {
|
||||
path: path.resolve(__dirname, 'dist'),
|
||||
filename: 'bundle.js',
|
||||
clean: true,
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.ts', '.tsx', '.js', '.jsx'],
|
||||
},
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
exclude: /node_modules/,
|
||||
use: 'ts-loader',
|
||||
},
|
||||
{
|
||||
test: /\.css$/,
|
||||
use: ['style-loader', 'css-loader', 'postcss-loader'],
|
||||
},
|
||||
],
|
||||
},
|
||||
plugins: [
|
||||
new HtmlWebpackPlugin({
|
||||
template: './src/index.html',
|
||||
}),
|
||||
],
|
||||
devServer: {
|
||||
port: 3000,
|
||||
proxy: [
|
||||
{
|
||||
context: ['/api'],
|
||||
target: 'http://localhost:8001',
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
49
generate_tfl_client.py
Normal file
49
generate_tfl_client.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = ["openapi-python-client"]
|
||||
# ///
|
||||
"""Regenerate the TfL Journey API client from the OpenAPI specification."""
|
||||
|
||||
# Run it with:
|
||||
# uv run generate_tfl_client.py
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
OPENAPI_SPEC = Path("Journey.yaml")
|
||||
OUTPUT_PATH = Path("tfl_journey_client")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not OPENAPI_SPEC.exists():
|
||||
raise FileNotFoundError(f"OpenAPI spec not found: {OPENAPI_SPEC}")
|
||||
|
||||
# Skip if client already exists
|
||||
if OUTPUT_PATH.exists():
|
||||
print(f"TfL client already exists at {OUTPUT_PATH}, skipping")
|
||||
return
|
||||
|
||||
# Generate the client
|
||||
print(f"Generating client from {OPENAPI_SPEC}")
|
||||
result = subprocess.run(
|
||||
[
|
||||
"openapi-python-client",
|
||||
"generate",
|
||||
"--path",
|
||||
str(OPENAPI_SPEC),
|
||||
"--output-path",
|
||||
str(OUTPUT_PATH),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"Client generated successfully at {OUTPUT_PATH}")
|
||||
else:
|
||||
print("Client generation failed")
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
pipeline/__init__.py
Normal file
0
pipeline/__init__.py
Normal file
22
pipeline/base.py
Normal file
22
pipeline/base.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from abc import ABC, abstractmethod
|
||||
import polars as pl
|
||||
|
||||
|
||||
class DataSource(ABC):
|
||||
"""Base class for all data sources."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Unique identifier for this data source."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load(self) -> pl.LazyFrame:
|
||||
"""Load raw data as LazyFrame."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
|
||||
"""Process and join with postcode coordinates."""
|
||||
pass
|
||||
23
pipeline/config.py
Normal file
23
pipeline/config.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
"""Shared configuration for the pipeline and server."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Data directories
|
||||
DATA_DIR = Path(__file__).parent.parent / "data_sources"
|
||||
PROCESSED_DIR = DATA_DIR / "processed"
|
||||
AGGREGATES_DIR = PROCESSED_DIR / "aggregates"
|
||||
|
||||
# H3 resolutions to generate and serve
|
||||
# https://h3geo.org/docs/core-library/restable/#average-area-in-m2
|
||||
H3_RESOLUTIONS = [6, 7, 8, 9, 10, 11]
|
||||
DEFAULT_H3_RESOLUTION = 8
|
||||
|
||||
# Year filters
|
||||
MIN_YEAR = 1995
|
||||
MAX_YEAR = 2024
|
||||
DEFAULT_MIN_YEAR = 2020
|
||||
DEFAULT_MAX_YEAR = 2024
|
||||
|
||||
# Price filters
|
||||
DEFAULT_MIN_PRICE = 0
|
||||
DEFAULT_MAX_PRICE = 100_000_000
|
||||
0
pipeline/processors/__init__.py
Normal file
0
pipeline/processors/__init__.py
Normal file
42
pipeline/processors/h3_aggregator.py
Normal file
42
pipeline/processors/h3_aggregator.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from pathlib import Path
|
||||
import polars as pl
|
||||
|
||||
from pipeline.config import AGGREGATES_DIR, H3_RESOLUTIONS
|
||||
|
||||
|
||||
def aggregate(df: pl.LazyFrame, resolution: int) -> pl.LazyFrame:
|
||||
"""Aggregate property data by H3 cell and year."""
|
||||
h3_col = f"h3_res{resolution}"
|
||||
|
||||
return (
|
||||
df.group_by(h3_col, "year")
|
||||
.agg(
|
||||
pl.len().alias("count"),
|
||||
pl.col("price").mean().alias("avg_price"),
|
||||
pl.col("price").median().alias("median_price"),
|
||||
pl.col("price").min().alias("min_price"),
|
||||
pl.col("price").max().alias("max_price"),
|
||||
)
|
||||
.rename({h3_col: "h3"})
|
||||
)
|
||||
|
||||
|
||||
def aggregate_all(df: pl.LazyFrame) -> dict[int, pl.LazyFrame]:
|
||||
"""Aggregate at all H3 resolutions."""
|
||||
return {res: aggregate(df, res) for res in H3_RESOLUTIONS}
|
||||
|
||||
|
||||
def save_aggregates(df: pl.LazyFrame, output_dir: Path | None = None) -> list[Path]:
|
||||
"""Aggregate and save at all H3 resolutions."""
|
||||
output_dir = output_dir or AGGREGATES_DIR
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
saved_paths = []
|
||||
aggregates = aggregate_all(df)
|
||||
|
||||
for res, agg_df in aggregates.items():
|
||||
output_path = output_dir / f"res{res}.parquet"
|
||||
agg_df.collect().write_parquet(output_path)
|
||||
saved_paths.append(output_path)
|
||||
|
||||
return saved_paths
|
||||
35
pipeline/run.py
Normal file
35
pipeline/run.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
"""Pipeline CLI to process property data with H3 spatial indexing."""
|
||||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.sources.postcodes import save_postcodes
|
||||
from pipeline.sources.property_prices import PropertyPricesSource
|
||||
from pipeline.processors.h3_aggregator import save_aggregates
|
||||
|
||||
|
||||
def run_pipeline():
|
||||
"""Run the full data processing pipeline."""
|
||||
print("=" * 60)
|
||||
print("Property Map Data Pipeline")
|
||||
print("=" * 60)
|
||||
|
||||
# Step 1: Process postcodes with H3 indices
|
||||
print("\n[1/3] Processing postcodes with H3 indices...")
|
||||
postcodes_path = save_postcodes()
|
||||
print(f" Saved: {postcodes_path}")
|
||||
|
||||
print("\n[2/3] Processing property prices...")
|
||||
postcodes = pl.scan_parquet(postcodes_path)
|
||||
property_source = PropertyPricesSource()
|
||||
properties = property_source.process(postcodes)
|
||||
print(" Joined property prices with postcodes")
|
||||
|
||||
print("\n[3/3] Aggregating at H3 resolutions...")
|
||||
saved_paths = save_aggregates(properties)
|
||||
for path in saved_paths:
|
||||
size_mb = path.stat().st_size / (1024 * 1024)
|
||||
print(f" Saved: {path.name} ({size_mb:.1f} MB)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_pipeline()
|
||||
0
pipeline/sources/__init__.py
Normal file
0
pipeline/sources/__init__.py
Normal file
49
pipeline/sources/postcodes.py
Normal file
49
pipeline/sources/postcodes.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
from pathlib import Path
|
||||
import polars as pl
|
||||
import h3
|
||||
|
||||
from pipeline.config import DATA_DIR, H3_RESOLUTIONS, PROCESSED_DIR
|
||||
|
||||
|
||||
def lat_long_to_h3(lat: float, long: float, resolution: int) -> str:
|
||||
"""Convert lat/long to H3 index at given resolution."""
|
||||
return h3.latlng_to_cell(lat, long, resolution)
|
||||
|
||||
|
||||
def load_postcodes() -> pl.LazyFrame:
|
||||
"""Load postcode data from arcgis parquet file."""
|
||||
return pl.scan_parquet(DATA_DIR / "arcgis_data.parquet").select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
pl.col("lat"),
|
||||
pl.col("long"),
|
||||
)
|
||||
|
||||
|
||||
def process_postcodes() -> pl.LazyFrame:
|
||||
"""Process postcodes and add H3 indices at multiple resolutions."""
|
||||
df = load_postcodes().collect()
|
||||
|
||||
for res in H3_RESOLUTIONS:
|
||||
col_name = f"h3_res{res}"
|
||||
df = df.with_columns(
|
||||
pl.struct(["lat", "long"])
|
||||
.map_elements(
|
||||
# Capture res by value using default argument to avoid closure bug
|
||||
lambda x, res=res: lat_long_to_h3(x["lat"], x["long"], res),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias(col_name)
|
||||
)
|
||||
|
||||
return df.lazy()
|
||||
|
||||
|
||||
def save_postcodes(output_path: Path | None = None) -> Path:
|
||||
"""Process and save postcodes with H3 indices."""
|
||||
output_path = output_path or PROCESSED_DIR / "postcodes_h3.parquet"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
df = process_postcodes().collect()
|
||||
df.write_parquet(output_path)
|
||||
|
||||
return output_path
|
||||
41
pipeline/sources/property_prices.py
Normal file
41
pipeline/sources/property_prices.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import polars as pl
|
||||
|
||||
from pipeline.base import DataSource
|
||||
from pipeline.config import DATA_DIR, H3_RESOLUTIONS
|
||||
|
||||
|
||||
class PropertyPricesSource(DataSource):
|
||||
"""Land Registry property prices data source."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "property_prices"
|
||||
|
||||
def load(self) -> pl.LazyFrame:
|
||||
"""Load raw property prices data."""
|
||||
return pl.scan_parquet(DATA_DIR / "pp-complete.parquet")
|
||||
|
||||
def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
|
||||
"""Process and join with postcode coordinates and H3 indices."""
|
||||
prices = self.load().select(
|
||||
pl.col("price"),
|
||||
pl.col("date_of_transfer").dt.year().alias("year"),
|
||||
pl.col("property_type"),
|
||||
pl.col("postcode"),
|
||||
)
|
||||
|
||||
joined = prices.join(
|
||||
postcodes,
|
||||
on="postcode",
|
||||
how="inner",
|
||||
)
|
||||
|
||||
h3_cols = [pl.col(f"h3_res{res}") for res in H3_RESOLUTIONS]
|
||||
return joined.select(
|
||||
pl.col("price"),
|
||||
pl.col("year"),
|
||||
pl.col("property_type"),
|
||||
pl.col("lat"),
|
||||
pl.col("long"),
|
||||
*h3_cols,
|
||||
)
|
||||
|
|
@ -5,8 +5,26 @@ description = "Add your description here"
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"attrs>=22.2.0",
|
||||
"httpx>=0.28.1",
|
||||
"ipywidgets>=8.0.0",
|
||||
"journey-client",
|
||||
"jupyter>=1.0.0",
|
||||
"nest-asyncio>=1.6.0",
|
||||
"numpy>=1.26.0",
|
||||
"pandas>=2.0.0",
|
||||
"plotly>=6.5.2",
|
||||
"polars>=1.37.1",
|
||||
"pyarrow>=15.0.0",
|
||||
"python-dateutil>=2.8.0",
|
||||
"tqdm>=4.67.1",
|
||||
"fastapi[standard]>=0.115.0",
|
||||
"uvicorn>=0.34.0",
|
||||
"h3>=3.7.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["ruff>=0.8.0"]
|
||||
|
||||
[tool.uv.sources]
|
||||
journey-client = { path = "./tfl_journey_client" }
|
||||
|
|
|
|||
0
server/__init__.py
Normal file
0
server/__init__.py
Normal file
25
server/config.py
Normal file
25
server/config.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
"""Server configuration - imports shared values from pipeline config."""
|
||||
|
||||
from pipeline.config import (
|
||||
AGGREGATES_DIR,
|
||||
H3_RESOLUTIONS as VALID_RESOLUTIONS,
|
||||
DEFAULT_H3_RESOLUTION as DEFAULT_RESOLUTION,
|
||||
MIN_YEAR,
|
||||
MAX_YEAR,
|
||||
DEFAULT_MIN_YEAR,
|
||||
DEFAULT_MAX_YEAR,
|
||||
DEFAULT_MIN_PRICE,
|
||||
DEFAULT_MAX_PRICE,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AGGREGATES_DIR",
|
||||
"VALID_RESOLUTIONS",
|
||||
"DEFAULT_RESOLUTION",
|
||||
"MIN_YEAR",
|
||||
"MAX_YEAR",
|
||||
"DEFAULT_MIN_YEAR",
|
||||
"DEFAULT_MAX_YEAR",
|
||||
"DEFAULT_MIN_PRICE",
|
||||
"DEFAULT_MAX_PRICE",
|
||||
]
|
||||
23
server/main.py
Normal file
23
server/main.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from pathlib import Path
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from server.routes import hexagons
|
||||
|
||||
app = FastAPI(title="Property Map API")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=False, # Cannot use True with wildcard origins
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(hexagons.router, prefix="/api")
|
||||
|
||||
# Mount static files for production (frontend build)
|
||||
frontend_dist = Path(__file__).parent.parent / "frontend" / "dist"
|
||||
if frontend_dist.exists():
|
||||
app.mount("/", StaticFiles(directory=frontend_dist, html=True), name="static")
|
||||
0
server/routes/__init__.py
Normal file
0
server/routes/__init__.py
Normal file
156
server/routes/hexagons.py
Normal file
156
server/routes/hexagons.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
from functools import lru_cache
|
||||
from fastapi import APIRouter, Query, HTTPException
|
||||
import polars as pl
|
||||
import h3
|
||||
|
||||
from server.config import (
|
||||
AGGREGATES_DIR,
|
||||
VALID_RESOLUTIONS,
|
||||
DEFAULT_RESOLUTION,
|
||||
DEFAULT_MIN_YEAR,
|
||||
DEFAULT_MAX_YEAR,
|
||||
DEFAULT_MIN_PRICE,
|
||||
DEFAULT_MAX_PRICE,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Cache loaded dataframes in memory (one per resolution)
|
||||
_df_cache: dict[int, pl.DataFrame] = {}
|
||||
|
||||
|
||||
def get_cached_df(resolution: int) -> pl.DataFrame | None:
|
||||
"""Get cached dataframe for resolution, loading from disk if needed."""
|
||||
if resolution not in _df_cache:
|
||||
parquet_path = AGGREGATES_DIR / f"res{resolution}.parquet"
|
||||
if not parquet_path.exists():
|
||||
return None
|
||||
# Load and add H3 cell centroids for fast bbox filtering
|
||||
df = pl.read_parquet(parquet_path)
|
||||
|
||||
# Pre-compute cell centroids for bbox filtering (much faster than is_in)
|
||||
centroids = [h3.cell_to_latlng(cell) for cell in df["h3"].to_list()]
|
||||
df = df.with_columns(
|
||||
[
|
||||
pl.Series("lat", [c[0] for c in centroids]),
|
||||
pl.Series("lng", [c[1] for c in centroids]),
|
||||
]
|
||||
)
|
||||
_df_cache[resolution] = df
|
||||
return _df_cache[resolution]
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def query_hexagons_cached(
|
||||
resolution: int,
|
||||
min_year: int,
|
||||
max_year: int,
|
||||
min_price: int,
|
||||
max_price: int,
|
||||
bounds_tuple: tuple[float, float, float, float],
|
||||
) -> tuple[list[dict], bool]:
|
||||
"""Cached query - returns (features, truncated)."""
|
||||
south, west, north, east = bounds_tuple
|
||||
|
||||
df = get_cached_df(resolution)
|
||||
if df is None:
|
||||
return [], False
|
||||
|
||||
# Fast bbox filter using pre-computed centroids (O(1) per row)
|
||||
df = df.filter(
|
||||
(pl.col("lat") >= south)
|
||||
& (pl.col("lat") <= north)
|
||||
& (pl.col("lng") >= west)
|
||||
& (pl.col("lng") <= east)
|
||||
)
|
||||
|
||||
# Filter by year range
|
||||
df = df.filter((pl.col("year") >= min_year) & (pl.col("year") <= max_year))
|
||||
|
||||
# Aggregate across years (weighted by count)
|
||||
df = df.group_by("h3").agg(
|
||||
pl.col("count").sum().alias("count"),
|
||||
(pl.col("avg_price") * pl.col("count")).sum().alias("weighted_price_sum"),
|
||||
pl.col("median_price").median().alias("median_price"),
|
||||
pl.col("min_price").min().alias("min_price"),
|
||||
pl.col("max_price").max().alias("max_price"),
|
||||
)
|
||||
|
||||
# Calculate weighted average price
|
||||
df = df.with_columns(
|
||||
(pl.col("weighted_price_sum") / pl.col("count")).alias("avg_price")
|
||||
).drop("weighted_price_sum")
|
||||
|
||||
# Filter by price range
|
||||
df = df.filter(
|
||||
(pl.col("avg_price") >= min_price) & (pl.col("avg_price") <= max_price)
|
||||
)
|
||||
|
||||
# Limit results
|
||||
MAX_HEXAGONS = 50000
|
||||
truncated = len(df) >= MAX_HEXAGONS
|
||||
if truncated:
|
||||
df = df.limit(MAX_HEXAGONS)
|
||||
|
||||
# Build response efficiently using Polars
|
||||
df = df.select(
|
||||
[
|
||||
pl.col("h3"),
|
||||
pl.col("count"),
|
||||
pl.col("avg_price").round(2),
|
||||
pl.col("median_price").round(2),
|
||||
pl.col("min_price"),
|
||||
pl.col("max_price"),
|
||||
]
|
||||
)
|
||||
|
||||
return df.to_dicts(), truncated
|
||||
|
||||
|
||||
@router.get("/hexagons")
|
||||
async def get_hexagons(
|
||||
resolution: int = Query(
|
||||
DEFAULT_RESOLUTION,
|
||||
ge=min(VALID_RESOLUTIONS),
|
||||
le=max(VALID_RESOLUTIONS),
|
||||
description=f"H3 resolution ({min(VALID_RESOLUTIONS)}-{max(VALID_RESOLUTIONS)})",
|
||||
),
|
||||
min_year: int = Query(DEFAULT_MIN_YEAR, description="Minimum year filter"),
|
||||
max_year: int = Query(DEFAULT_MAX_YEAR, description="Maximum year filter"),
|
||||
min_price: float = Query(DEFAULT_MIN_PRICE, description="Minimum average price"),
|
||||
max_price: float = Query(DEFAULT_MAX_PRICE, description="Maximum average price"),
|
||||
bounds: str | None = Query(None, description="Bounding box: south,west,north,east"),
|
||||
) -> dict:
|
||||
"""Get aggregated property data as GeoJSON hexagons within bounds."""
|
||||
if resolution not in VALID_RESOLUTIONS:
|
||||
resolution = DEFAULT_RESOLUTION
|
||||
|
||||
if not bounds:
|
||||
raise HTTPException(status_code=400, detail="bounds parameter is required")
|
||||
|
||||
try:
|
||||
south, west, north, east = map(float, bounds.split(","))
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Invalid bounds format. Use: south,west,north,east"
|
||||
)
|
||||
|
||||
# Round bounds to reduce cache misses (0.01 degree ≈ 1km precision)
|
||||
bounds_tuple = (
|
||||
round(south, 2),
|
||||
round(west, 2),
|
||||
round(north, 2),
|
||||
round(east, 2),
|
||||
)
|
||||
|
||||
# Convert prices to int for cache key hashability
|
||||
features, truncated = query_hexagons_cached(
|
||||
resolution,
|
||||
min_year,
|
||||
max_year,
|
||||
int(min_price),
|
||||
int(max_price),
|
||||
bounds_tuple,
|
||||
)
|
||||
|
||||
return {"features": features, "truncated": truncated}
|
||||
Loading…
Add table
Add a link
Reference in a new issue