Use self-hosted tiles
This commit is contained in:
parent
1cee9c38ce
commit
69de6d75af
6 changed files with 342 additions and 21 deletions
58
CLAUDE.md
58
CLAUDE.md
|
|
@ -83,12 +83,11 @@ The server and frontend must handle these human-readable names. See the full ren
|
|||
|
||||
Rust + Axum. Loads parquet into memory at startup.
|
||||
|
||||
**Structure:**
|
||||
- `data/property.rs` — Loads `wide.parquet`, auto-discovers numeric + enum features, computes histograms, sorts rows by spatial locality, precomputes H3 cells (resolutions 4–12)
|
||||
- `data/poi.rs` — Loads `filtered_uk_pois.parquet`
|
||||
- `index.rs` — `GridIndex`: 0.01° spatial grid for O(1) cell lookup
|
||||
- `filter.rs` — Parses filter strings and checks rows. Format: `name:min:max` (numeric), `name:val1|val2` (enum)
|
||||
- `routes/` — One file per endpoint
|
||||
**Structure** (uses Rust 2018 module style — `foo.rs` + `foo/` directory, not `foo/mod.rs`):
|
||||
- `data.rs` + `data/` — Property and POI data loading
|
||||
- `parsing.rs` + `parsing/` — Filter parsing and bounds parsing
|
||||
- `routes.rs` + `routes/` — One file per endpoint
|
||||
- `utils.rs` + `utils/` — GridIndex, hashing, interned columns
|
||||
- `consts.rs` — Key constants (histogram bins, H3 range, max enum cardinality, excluded columns)
|
||||
|
||||
**API endpoints:**
|
||||
|
|
@ -100,10 +99,10 @@ Rust + Axum. Loads parquet into memory at startup.
|
|||
|
||||
Serves `frontend/dist/` as static fallback in production.
|
||||
|
||||
**Data representation:**
|
||||
- Numeric features: row-major flat `Vec<f64>`, NaN = null
|
||||
- Enum features: `Vec<u8>` indices into value list, 255 = null
|
||||
- String fields (address, postcode): `Vec<String>`, empty = null
|
||||
**Data representation (unified model):**
|
||||
- All features (numeric and enum): row-major flat `Vec<f32>`, NaN = null
|
||||
- Enum features: stored as f32 indices (0.0, 1.0, 2.0...) with `enum_values: FxHashMap<usize, Vec<String>>` mapping feature index → string values
|
||||
- String fields (address, postcode): interned/packed for memory efficiency
|
||||
- The server accepts the parquet path as a CLI argument (defaults to `data_sources/processed/wide.parquet`)
|
||||
|
||||
### Frontend (`frontend/`)
|
||||
|
|
@ -216,14 +215,49 @@ Every UI element must use the correct token from this table. Do not invent new p
|
|||
- [ ] Sidebars, dropdowns, and popups are readable in both modes
|
||||
- [ ] HomePage and DataSourcesPage adapt correctly
|
||||
|
||||
## Coding Preferences
|
||||
|
||||
- **Unified data models over special-casing**: Prefer storing different data types uniformly (e.g., enums as f32 indices alongside numeric features) rather than maintaining separate code paths
|
||||
- **Terse tests**: Test what matters in as few tests as possible — don't overcomplicate with excessive setup or edge cases that don't add value
|
||||
- **Extract and organize**: Group related utilities into proper modules (e.g., `utils/`, `parsing/`) rather than leaving helpers scattered
|
||||
- **Inline module tests**: Place `#[cfg(test)] mod tests { }` at the bottom of each module file rather than in separate test files
|
||||
|
||||
## Rust Code Style (server-rs)
|
||||
|
||||
Follow these conventions in all Rust code:
|
||||
|
||||
1. **Module style**: Use Rust 2018 module naming — `foo.rs` + `foo/` directory, NOT `foo/mod.rs`
|
||||
2. **Imports over inline paths**: Import items at the top of the file, don't use `crate::` inline in code
|
||||
```rust
|
||||
// Good
|
||||
use crate::utils::generate_priorities;
|
||||
let p = generate_priorities(n);
|
||||
|
||||
// Bad
|
||||
let p = crate::utils::generate_priorities(n);
|
||||
```
|
||||
3. **Tracing macros**: Import and use short form, not fully qualified
|
||||
```rust
|
||||
// Good
|
||||
use tracing::{info, warn};
|
||||
info!("message");
|
||||
|
||||
// Bad
|
||||
tracing::info!("message");
|
||||
```
|
||||
4. **JSON serialization**: Use `serde_json` with `#[derive(Serialize)]` structs, not manual string building
|
||||
5. **Precompute at startup**: For static/rarely-changing responses, compute once at startup and store in `AppState`
|
||||
6. **Unique placeholders**: When injecting content into HTML, use distinctive markers like `__NARROWIT_OG_TAGS__` that won't accidentally match other content
|
||||
|
||||
## Key Implementation Details
|
||||
|
||||
- **Spatial sort**: Rows sorted by 0.01° grid cell at load time for cache-friendly sequential access
|
||||
- **Row-major layout**: `feature_data[row * num_features + feat_idx]` — all features for one property are contiguous
|
||||
- **Row-major layout**: `feature_data[row * num_features + feat_idx]` — all features (numeric and enum) for one property are contiguous
|
||||
- **H3 precomputation**: Resolutions 4–12 computed in parallel (rayon) at startup
|
||||
- **Histogram percentiles without sorting**: O(n) two-pass algorithm — build histogram, interpolate percentiles
|
||||
- **Direct JSON writing**: Hexagon endpoint writes JSON via string buffer, avoids serde_json::Value allocations
|
||||
- **Startup precomputation**: Static responses (like `/api/features`) are computed once at startup and cached in `AppState`
|
||||
- **POI transform validation**: Fails if any OSM category is unmapped — guarantees exhaustive coverage
|
||||
- **Fuzzy join**: Groups by postcode, uses `thefuzz.token_sort_ratio` with numeric token compatibility, greedy assignment from highest score
|
||||
- **Filter bounds format**: `south,west,north,east` (not standard bbox order)
|
||||
- **POI proximity**: Uses 0.05° grid (~5km cells) to reduce candidates before haversine distance check
|
||||
- **OG tag injection**: Uses `<meta name="x-og-placeholder" content="__NARROWIT_OG_TAGS__"/>` placeholder in HTML, replaced at runtime by middleware
|
||||
|
|
|
|||
|
|
@ -22,4 +22,4 @@ COPY --from=frontend /app/frontend/dist ./dist/
|
|||
|
||||
EXPOSE 8001
|
||||
ENTRYPOINT ["./property-map-server"]
|
||||
CMD ["--data", "/data/wide.parquet", "--pois", "/data/filtered_uk_pois.parquet"]
|
||||
CMD ["--data", "/data/wide.parquet", "--pois", "/data/filtered_uk_pois.parquet", "--tiles", "/data/uk.pmtiles", "--postcodes", "/data/postcodes"]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ version: '3'
|
|||
|
||||
vars:
|
||||
DATA_DIR: /bulk/property-data
|
||||
TILES_OUTPUT: "{{.DATA_DIR}}/uk.pmtiles"
|
||||
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
|
||||
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
|
||||
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
|
||||
|
|
@ -21,8 +22,31 @@ vars:
|
|||
NAPTAN_OUTPUT: "{{.DATA_DIR}}/naptan.parquet"
|
||||
BROADBAND_OUTPUT: "{{.DATA_DIR}}/broadband.parquet"
|
||||
SCHOOL_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/school_proximity.parquet"
|
||||
POSTCODES_OUTPUT: "{{.DATA_DIR}}/postcodes"
|
||||
|
||||
tasks:
|
||||
download:tiles:
|
||||
desc: Download UK map tiles (PMTiles format from Protomaps)
|
||||
status:
|
||||
- test -f {{.TILES_OUTPUT}}
|
||||
vars:
|
||||
PMTILES_VERSION: "1.22.3"
|
||||
PMTILES_BIN: "{{.DATA_DIR}}/pmtiles"
|
||||
cmds:
|
||||
- |
|
||||
echo "Downloading UK PMTiles (~1.5GB)..."
|
||||
echo "This extracts UK tiles from the Protomaps planet file."
|
||||
echo ""
|
||||
# Download pmtiles CLI if not present
|
||||
if [ ! -f "{{.PMTILES_BIN}}" ]; then
|
||||
echo "Downloading pmtiles CLI v{{.PMTILES_VERSION}}..."
|
||||
curl -sL "https://github.com/protomaps/go-pmtiles/releases/download/v{{.PMTILES_VERSION}}/go-pmtiles_{{.PMTILES_VERSION}}_Linux_x86_64.tar.gz" | tar -xz -C "{{.DATA_DIR}}" pmtiles
|
||||
chmod +x "{{.PMTILES_BIN}}"
|
||||
fi
|
||||
# Extract UK region (bbox: -10.5,49.5,2.5,61)
|
||||
# Using a recent daily build from Protomaps
|
||||
"{{.PMTILES_BIN}}" extract https://build.protomaps.com/20260201.pmtiles {{.TILES_OUTPUT}} --bbox=-10.5,49.5,2.5,61
|
||||
|
||||
prompt:epc:
|
||||
desc: Prompt user to download EPC dataset (requires registration)
|
||||
status:
|
||||
|
|
@ -110,6 +134,12 @@ tasks:
|
|||
cmds:
|
||||
- uv run python -m pipeline.download.broadband --output {{.BROADBAND_OUTPUT}}
|
||||
|
||||
download:postcodes:
|
||||
desc: Download GB postcodes data from MapIt
|
||||
status:
|
||||
- test -f {{.POSTCODES_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.postcodes --output {{.POSTCODES_OUTPUT}}
|
||||
|
||||
download:noise:
|
||||
desc: Download Defra noise data (road, rail, airport) sampled at postcode centroids
|
||||
|
|
|
|||
|
|
@ -5,10 +5,6 @@ includes:
|
|||
taskfile: ./Taskfile.data.yml
|
||||
flatten: true
|
||||
|
||||
vars:
|
||||
DATA_DIR: /bulk/property-data
|
||||
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||
|
||||
tasks:
|
||||
install:
|
||||
|
|
@ -33,13 +29,13 @@ tasks:
|
|||
desc: Run Rust backend on port 8001 (debug build, fast compile)
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||
- cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --tiles {{.TILES_OUTPUT}} --postcodes {{.POSTCODES_OUTPUT}}
|
||||
|
||||
dev:server:release:
|
||||
desc: Run Rust backend on port 8001 (release build)
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||
- cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --tiles {{.TILES_OUTPUT}} --postcodes {{.POSTCODES_OUTPUT}}
|
||||
|
||||
dev:og:
|
||||
desc: Run OG screenshot sidecar on port 8002
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import type { ViewState, Bounds } from '../types';
|
||||
|
||||
export const MAP_STYLE_LIGHT = 'https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json';
|
||||
export const MAP_STYLE_DARK = 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json';
|
||||
// Self-hosted tile styles from server
|
||||
export const MAP_STYLE_LIGHT = '/api/tiles/style.json?theme=light';
|
||||
export const MAP_STYLE_DARK = '/api/tiles/style.json?theme=dark';
|
||||
|
||||
export const GRADIENT: { t: number; color: [number, number, number] }[] = [
|
||||
{ t: 0, color: [46, 204, 113] },
|
||||
|
|
@ -54,6 +55,9 @@ export function countToColor(t: number): [number, number, number] {
|
|||
return DENSITY_GRADIENT[DENSITY_GRADIENT.length - 1].color;
|
||||
}
|
||||
|
||||
/** Zoom threshold at which we switch from hexagons to postcode polygons */
|
||||
export const POSTCODE_ZOOM_THRESHOLD = 15;
|
||||
|
||||
export function zoomToResolution(zoom: number): number {
|
||||
if (zoom < 6) return 5;
|
||||
if (zoom < 7) return 6;
|
||||
|
|
|
|||
257
server-rs/src/routes/tiles.rs
Normal file
257
server-rs/src/routes/tiles.rs
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::{header, HeaderMap, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use pmtiles::async_reader::AsyncPmTilesReader;
|
||||
use pmtiles::MmapBackend;
|
||||
use serde::Deserialize;
|
||||
use tracing::warn;
|
||||
|
||||
pub type TileReader = AsyncPmTilesReader<MmapBackend>;
|
||||
|
||||
pub async fn get_tile(
|
||||
State(reader): State<Arc<TileReader>>,
|
||||
Path((z, x, y)): Path<(u8, u32, u32)>,
|
||||
) -> Response {
|
||||
match reader.get_tile(z, x as u64, y as u64).await {
|
||||
Ok(Some(tile_bytes)) => (
|
||||
StatusCode::OK,
|
||||
[
|
||||
(header::CONTENT_TYPE, "application/x-protobuf"),
|
||||
(header::CONTENT_ENCODING, "gzip"),
|
||||
(header::CACHE_CONTROL, "public, max-age=86400"),
|
||||
],
|
||||
tile_bytes.to_vec(),
|
||||
)
|
||||
.into_response(),
|
||||
Ok(None) => StatusCode::NO_CONTENT.into_response(),
|
||||
Err(err) => {
|
||||
warn!(z, x, y, error = %err, "Failed to get tile");
|
||||
StatusCode::INTERNAL_SERVER_ERROR.into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct StyleParams {
|
||||
#[serde(default)]
|
||||
theme: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_style(
|
||||
State(reader): State<Arc<TileReader>>,
|
||||
headers: HeaderMap,
|
||||
Query(params): Query<StyleParams>,
|
||||
) -> Response {
|
||||
let is_dark = params.theme.as_deref() == Some("dark");
|
||||
|
||||
// Metadata is returned as a JSON string
|
||||
let metadata_str = match reader.get_metadata().await {
|
||||
Ok(meta) => meta,
|
||||
Err(err) => {
|
||||
warn!(error = %err, "Failed to get PMTiles metadata");
|
||||
return StatusCode::INTERNAL_SERVER_ERROR.into_response();
|
||||
}
|
||||
};
|
||||
|
||||
// Parse the JSON string
|
||||
let metadata: serde_json::Value = match serde_json::from_str(&metadata_str) {
|
||||
Ok(v) => v,
|
||||
Err(err) => {
|
||||
warn!(error = %err, "Failed to parse PMTiles metadata JSON");
|
||||
serde_json::Value::Object(serde_json::Map::new())
|
||||
}
|
||||
};
|
||||
|
||||
// Extract tilestats for layer info if available
|
||||
let layers: Vec<serde_json::Value> = metadata
|
||||
.get("vector_layers")
|
||||
.and_then(|v| v.as_array())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
// Build absolute tile URL using the request host
|
||||
let host = headers
|
||||
.get(header::HOST)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.unwrap_or("localhost:8001");
|
||||
let tile_url = format!("http://{}/api/tiles/{{z}}/{{x}}/{{y}}", host);
|
||||
let style = build_style(is_dark, &layers, &tile_url);
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
[(header::CONTENT_TYPE, "application/json")],
|
||||
serde_json::to_string(&style).unwrap(),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> serde_json::Value {
|
||||
let (bg_color, water_color, land_color, road_color, text_color, text_halo) = if is_dark {
|
||||
("#1a1a1a", "#193447", "#1a1a1a", "#2a2a2a", "#888888", "#000000")
|
||||
} else {
|
||||
("#f8f4f0", "#aad3df", "#f8f4f0", "#ffffff", "#333333", "#ffffff")
|
||||
};
|
||||
|
||||
// Build layer list from metadata
|
||||
let layer_ids: Vec<&str> = layers
|
||||
.iter()
|
||||
.filter_map(|l| l.get("id").and_then(|v| v.as_str()))
|
||||
.collect();
|
||||
|
||||
let mut style_layers = vec![serde_json::json!({
|
||||
"id": "background",
|
||||
"type": "background",
|
||||
"paint": { "background-color": bg_color }
|
||||
})];
|
||||
|
||||
// Water layer
|
||||
if layer_ids.contains(&"water") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "water",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "water",
|
||||
"paint": { "fill-color": water_color }
|
||||
}));
|
||||
}
|
||||
|
||||
// Land/earth layer
|
||||
if layer_ids.contains(&"earth") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "earth",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "earth",
|
||||
"paint": { "fill-color": land_color }
|
||||
}));
|
||||
}
|
||||
|
||||
// Landuse
|
||||
if layer_ids.contains(&"landuse") {
|
||||
let landuse_color = if is_dark { "#1f2d1f" } else { "#d8e8c8" };
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "landuse-park",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "landuse",
|
||||
"filter": ["any",
|
||||
["==", ["get", "pmap:kind"], "park"],
|
||||
["==", ["get", "pmap:kind"], "nature_reserve"],
|
||||
["==", ["get", "pmap:kind"], "forest"]
|
||||
],
|
||||
"paint": { "fill-color": landuse_color, "fill-opacity": 0.5 }
|
||||
}));
|
||||
}
|
||||
|
||||
// Roads
|
||||
if layer_ids.contains(&"roads") {
|
||||
let road_casing = if is_dark { "#111111" } else { "#cccccc" };
|
||||
style_layers.extend(vec![
|
||||
serde_json::json!({
|
||||
"id": "roads-casing",
|
||||
"type": "line",
|
||||
"source": "protomaps",
|
||||
"source-layer": "roads",
|
||||
"filter": ["!=", ["get", "pmap:kind"], "path"],
|
||||
"paint": {
|
||||
"line-color": road_casing,
|
||||
"line-width": ["interpolate", ["linear"], ["zoom"], 10, 1, 18, 12]
|
||||
}
|
||||
}),
|
||||
serde_json::json!({
|
||||
"id": "roads",
|
||||
"type": "line",
|
||||
"source": "protomaps",
|
||||
"source-layer": "roads",
|
||||
"filter": ["!=", ["get", "pmap:kind"], "path"],
|
||||
"paint": {
|
||||
"line-color": road_color,
|
||||
"line-width": ["interpolate", ["linear"], ["zoom"], 10, 0.5, 18, 8]
|
||||
}
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
// Buildings
|
||||
if layer_ids.contains(&"buildings") {
|
||||
let building_color = if is_dark { "#252525" } else { "#e8e4e0" };
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "buildings",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "buildings",
|
||||
"minzoom": 14,
|
||||
"paint": { "fill-color": building_color, "fill-opacity": 0.8 }
|
||||
}));
|
||||
}
|
||||
|
||||
// Waterway labels - this layer ID is used by deck.gl as an insertion point
|
||||
// for interleaved layers (beforeId: 'waterway_label')
|
||||
if layer_ids.contains(&"water") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "waterway_label",
|
||||
"type": "symbol",
|
||||
"source": "protomaps",
|
||||
"source-layer": "water",
|
||||
"filter": ["all", ["has", "name"], ["==", ["geometry-type"], "LineString"]],
|
||||
"layout": {
|
||||
"text-field": ["get", "name"],
|
||||
"text-font": ["Noto Sans Regular"],
|
||||
"text-size": 10,
|
||||
"symbol-placement": "line"
|
||||
},
|
||||
"paint": {
|
||||
"text-color": water_color,
|
||||
"text-halo-color": text_halo,
|
||||
"text-halo-width": 1
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
// Place labels
|
||||
if layer_ids.contains(&"places") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "place-labels",
|
||||
"type": "symbol",
|
||||
"source": "protomaps",
|
||||
"source-layer": "places",
|
||||
"filter": ["has", "name"],
|
||||
"layout": {
|
||||
"text-field": ["get", "name"],
|
||||
"text-font": ["Noto Sans Regular"],
|
||||
"text-size": ["interpolate", ["linear"], ["zoom"],
|
||||
6, ["match", ["get", "pmap:kind"], "city", 12, "town", 10, 8],
|
||||
14, ["match", ["get", "pmap:kind"], "city", 24, "town", 18, 14]
|
||||
],
|
||||
"text-max-width": 8
|
||||
},
|
||||
"paint": {
|
||||
"text-color": text_color,
|
||||
"text-halo-color": text_halo,
|
||||
"text-halo-width": 1.5
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
serde_json::json!({
|
||||
"version": 8,
|
||||
"name": if is_dark { "Dark" } else { "Light" },
|
||||
"glyphs": "https://protomaps.github.io/basemaps-assets/fonts/{fontstack}/{range}.pbf",
|
||||
"sources": {
|
||||
"protomaps": {
|
||||
"type": "vector",
|
||||
"tiles": [tile_url],
|
||||
"maxzoom": 15
|
||||
}
|
||||
},
|
||||
"layers": style_layers
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn init_tile_reader(path: &std::path::Path) -> anyhow::Result<TileReader> {
|
||||
let backend = MmapBackend::try_from(path).await?;
|
||||
let reader = AsyncPmTilesReader::try_from_source(backend).await?;
|
||||
Ok(reader)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue