Compare commits

...

10 commits

Author SHA1 Message Date
e09aa574b0 No scroll story
Some checks failed
CI / Rust (lint + test) (push) Failing after 20s
CI / Python (lint + test) (push) Failing after 59s
CI / Frontend (lint + typecheck) (push) Successful in 3m50s
Build and publish Docker image / build-and-push (push) Failing after 5m33s
2026-03-20 07:52:31 +00:00
852bb3f3a7 Faster scraping 2026-03-20 07:52:22 +00:00
05b8ee06c1 Security 2026-03-20 07:52:06 +00:00
f32a552f46 Small fixes & fmt 2026-03-19 21:51:07 +00:00
6b12e21d50 More fixes 2026-03-18 22:46:08 +00:00
15fa09430b Shared state 2026-03-17 21:08:32 +00:00
53fff3efaa Morning improvements 2026-03-17 13:29:03 +00:00
3e9fba5303 Udpates 2026-03-15 21:54:48 +00:00
c38d654ac7 Fmt 2026-03-15 21:22:28 +00:00
479ef92236 Changes again 2026-03-15 21:15:26 +00:00
135 changed files with 7290 additions and 2721 deletions

View file

@ -1,5 +1,5 @@
# Stage 1: Build frontend
FROM node:20-slim AS frontend
FROM node:22-slim AS frontend
WORKDIR /app/frontend
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
@ -7,7 +7,7 @@ COPY frontend/ ./
RUN npm run build:no-prerender
# Stage 2: Build Rust server
FROM rust:1.83-bookworm AS server
FROM rust:1.84-bookworm AS server
WORKDIR /app
COPY server-rs/ server-rs/
WORKDIR /app/server-rs

View file

@ -813,8 +813,14 @@
],
"source": [
"# Build area lookup from both sets\n",
"areas_before = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in no_green[\"features\"]}\n",
"areas_after = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in with_green[\"features\"]}\n",
"areas_before = {\n",
" f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
" for f in no_green[\"features\"]\n",
"}\n",
"areas_after = {\n",
" f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
" for f in with_green[\"features\"]\n",
"}\n",
"\n",
"# Compute percentage removed\n",
"diffs = []\n",
@ -1161,16 +1167,23 @@
"\n",
"colormap = cm.LinearColormap(\n",
" colors=[\"#ffffcc\", \"#fd8d3c\", \"#e31a1c\", \"#800026\"],\n",
" vmin=0, vmax=min(max_pct, 90),\n",
" vmin=0,\n",
" vmax=min(max_pct, 90),\n",
" caption=\"% area removed by greenspace\",\n",
")\n",
"\n",
"\n",
"# Show original boundaries, colored by how much was removed\n",
"def style_by_removal(feature):\n",
" pc = feature[\"properties\"][\"postcode\"]\n",
" pct = diff_lookup.get(pc, 0)\n",
" if pct <= 1:\n",
" return {\"fillColor\": \"#cccccc\", \"color\": \"#999\", \"weight\": 0.5, \"fillOpacity\": 0.15}\n",
" return {\n",
" \"fillColor\": \"#cccccc\",\n",
" \"color\": \"#999\",\n",
" \"weight\": 0.5,\n",
" \"fillOpacity\": 0.15,\n",
" }\n",
" return {\n",
" \"fillColor\": colormap(min(pct, 90)),\n",
" \"color\": \"white\",\n",
@ -1178,6 +1191,7 @@
" \"fillOpacity\": 0.6,\n",
" }\n",
"\n",
"\n",
"folium.GeoJson(\n",
" no_green,\n",
" name=\"Greenspace removal %\",\n",

View file

@ -63,16 +63,23 @@
" \"n\": f\"{len(a):,}\",\n",
" }\n",
"\n",
"\n",
"actual = backtest_df[\"actual_price\"].to_numpy().astype(np.float64)\n",
"metrics = {\n",
" \"Naive\": compute_metrics(actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)),\n",
" \"Index\": compute_metrics(actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)),\n",
" \"Naive\": compute_metrics(\n",
" actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)\n",
" ),\n",
" \"Index\": compute_metrics(\n",
" actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)\n",
" ),\n",
"}\n",
"\n",
"metrics_table = pl.DataFrame([\n",
"metrics_table = pl.DataFrame(\n",
" [\n",
" {\"Metric\": k, **{stage: v[k] for stage, v in metrics.items()}}\n",
" for k in list(metrics[\"Naive\"].keys())\n",
"])\n",
" ]\n",
")\n",
"metrics_table"
]
},
@ -91,8 +98,7 @@
"source": [
"# National index (average across all sectors weighted by n_pairs)\n",
"national = (\n",
" index_df\n",
" .group_by(\"year\")\n",
" index_df.group_by(\"year\")\n",
" .agg(\n",
" (pl.col(\"log_index\") * pl.col(\"n_pairs\")).sum() / pl.col(\"n_pairs\").sum(),\n",
" )\n",
@ -107,14 +113,23 @@
"\n",
"# If not enough, pick some with high/low n_pairs\n",
"if len(sample_sectors) < 3:\n",
" sector_counts = index_df.group_by(\"sector\").agg(pl.col(\"n_pairs\").first()).sort(\"n_pairs\", descending=True)\n",
" sector_counts = (\n",
" index_df.group_by(\"sector\")\n",
" .agg(pl.col(\"n_pairs\").first())\n",
" .sort(\"n_pairs\", descending=True)\n",
" )\n",
" top = sector_counts.head(2)[\"sector\"].to_list()\n",
" bottom = sector_counts.filter(pl.col(\"n_pairs\") > 0).tail(2)[\"sector\"].to_list()\n",
" sample_sectors = list(set(sample_sectors + top + bottom))[:5]\n",
"\n",
"samples = index_df.filter(pl.col(\"sector\").is_in(sample_sectors))\n",
"\n",
"combined = pl.concat([national.select(\"sector\", \"year\", \"log_index\"), samples.select(\"sector\", \"year\", \"log_index\")])\n",
"combined = pl.concat(\n",
" [\n",
" national.select(\"sector\", \"year\", \"log_index\"),\n",
" samples.select(\"sector\", \"year\", \"log_index\"),\n",
" ]\n",
")\n",
"\n",
"# Normalize: index = 100 at base year (earliest available)\n",
"combined = combined.with_columns(\n",
@ -122,7 +137,10 @@
")\n",
"\n",
"fig = px.line(\n",
" combined.to_pandas(), x=\"year\", y=\"index_100\", color=\"sector\",\n",
" combined.to_pandas(),\n",
" x=\"year\",\n",
" y=\"index_100\",\n",
" color=\"sector\",\n",
" title=\"Repeat-Sales Price Index (base year = 100)\",\n",
" labels={\"index_100\": \"Index (base=100)\", \"year\": \"Year\"},\n",
")\n",
@ -155,8 +173,10 @@
"\n",
"fig.update_layout(\n",
" title=\"Absolute Percentage Error Distribution\",\n",
" xaxis_title=\"APE (%)\", yaxis_title=\"Count\",\n",
" barmode=\"overlay\", height=500,\n",
" xaxis_title=\"APE (%)\",\n",
" yaxis_title=\"Count\",\n",
" barmode=\"overlay\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
@ -183,17 +203,27 @@
"pred = sample[\"predicted\"].to_numpy().astype(np.float64)\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Scattergl(\n",
" x=actual_sample, y=pred, mode=\"markers\",\n",
" marker=dict(size=2, opacity=0.3), name=\"Index\",\n",
"))\n",
"fig.add_trace(\n",
" go.Scattergl(\n",
" x=actual_sample,\n",
" y=pred,\n",
" mode=\"markers\",\n",
" marker=dict(size=2, opacity=0.3),\n",
" name=\"Index\",\n",
" )\n",
")\n",
"# 45-degree reference line\n",
"min_val = max(10_000, min(actual_sample.min(), np.nanmin(pred)))\n",
"max_val = min(5_000_000, max(actual_sample.max(), np.nanmax(pred)))\n",
"fig.add_trace(go.Scatter(\n",
" x=[min_val, max_val], y=[min_val, max_val],\n",
" mode=\"lines\", line=dict(color=\"red\", dash=\"dash\"), showlegend=False,\n",
"))\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=[min_val, max_val],\n",
" y=[min_val, max_val],\n",
" mode=\"lines\",\n",
" line=dict(color=\"red\", dash=\"dash\"),\n",
" showlegend=False,\n",
" )\n",
")\n",
"fig.update_xaxes(type=\"log\", title_text=\"Actual (\\u00a3)\")\n",
"fig.update_yaxes(type=\"log\", title_text=\"Predicted (\\u00a3)\")\n",
"fig.update_layout(title=\"Predicted vs Actual Price (log scale, 10K sample)\", height=500)\n",
@ -234,12 +264,22 @@
" for name, arr in [(\"Naive\", naive), (\"Index\", pred)]:\n",
" ape = np.abs(arr[mask] - actual[mask]) / actual[mask]\n",
" valid = np.isfinite(ape)\n",
" rows.append({\"Price Band\": label, \"Method\": name, \"MdAPE (%)\": float(np.median(ape[valid]) * 100)})\n",
" rows.append(\n",
" {\n",
" \"Price Band\": label,\n",
" \"Method\": name,\n",
" \"MdAPE (%)\": float(np.median(ape[valid]) * 100),\n",
" }\n",
" )\n",
"\n",
"band_df = pl.DataFrame(rows)\n",
"fig = px.bar(\n",
" band_df.to_pandas(), x=\"Price Band\", y=\"MdAPE (%)\", color=\"Method\",\n",
" barmode=\"group\", title=\"MdAPE by Price Band\",\n",
" band_df.to_pandas(),\n",
" x=\"Price Band\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" barmode=\"group\",\n",
" title=\"MdAPE by Price Band\",\n",
" category_orders={\"Price Band\": [b[2] for b in bands]},\n",
")\n",
"fig.update_layout(height=450)\n",
@ -264,7 +304,9 @@
")\n",
"\n",
"# Top 20 areas by volume\n",
"top_areas = bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n",
"top_areas = (\n",
" bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n",
")\n",
"\n",
"actual_np = bt[\"actual_price\"].to_numpy().astype(np.float64)\n",
"pred_np = bt[\"predicted\"].to_numpy().astype(np.float64)\n",
@ -279,12 +321,18 @@
" p = arr[mask]\n",
" valid = np.isfinite(p) & (a > 0)\n",
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
" rows.append({\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n",
" rows.append(\n",
" {\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)}\n",
" )\n",
"\n",
"area_df = pl.DataFrame(rows)\n",
"fig = px.bar(\n",
" area_df.to_pandas(), x=\"Area\", y=\"MdAPE (%)\", color=\"Method\",\n",
" barmode=\"group\", title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n",
" area_df.to_pandas(),\n",
" x=\"Area\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" barmode=\"group\",\n",
" title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n",
" category_orders={\"Area\": top_areas},\n",
")\n",
"fig.update_layout(height=500)\n",
@ -324,11 +372,20 @@
" p = arr[mask]\n",
" valid = np.isfinite(p) & (a > 0)\n",
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
" rows.append({\"Gap (years)\": gap, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n",
" rows.append(\n",
" {\n",
" \"Gap (years)\": gap,\n",
" \"Method\": name,\n",
" \"MdAPE (%)\": float(np.median(ape) * 100),\n",
" }\n",
" )\n",
"\n",
"gap_df = pl.DataFrame(rows)\n",
"fig = px.line(\n",
" gap_df.to_pandas(), x=\"Gap (years)\", y=\"MdAPE (%)\", color=\"Method\",\n",
" gap_df.to_pandas(),\n",
" x=\"Gap (years)\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" title=\"MdAPE by Holding Period (years between input and actual sale)\",\n",
" markers=True,\n",
")\n",

View file

@ -52,7 +52,9 @@
"pl.Config.set_tbl_rows(20)\n",
"pl.Config.set_fmt_str_lengths(80)\n",
"\n",
"df = pl.read_parquet(\"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\")\n",
"df = pl.read_parquet(\n",
" \"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\"\n",
")\n",
"schema = df.schema\n",
"print(f\"Total rows: {len(df):,}\")\n",
"print(f\"Columns ({len(schema)}):\")\n",
@ -150,11 +152,13 @@
],
"source": [
"# Null counts\n",
"null_df = pl.DataFrame({\n",
"null_df = pl.DataFrame(\n",
" {\n",
" \"column\": df.columns,\n",
" \"nulls\": [df[c].null_count() for c in df.columns],\n",
" \"pct\": [f\"{df[c].null_count() / len(df) * 100:.1f}%\" for c in df.columns],\n",
"})\n",
" }\n",
")\n",
"null_df.filter(pl.col(\"nulls\") > 0)"
]
},
@ -197,8 +201,12 @@
" \"price = 0\": len(df.filter(pl.col(\"price\") == 0)),\n",
" \"price > 50M\": len(df.filter(pl.col(\"price\") > 50_000_000)),\n",
" \"floorspace > 10,000 sqm\": len(df.filter(pl.col(\"floorspace_sqm\") > 10_000)),\n",
" \"latitude outside UK (< 49 or > 61)\": len(df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))),\n",
" \"longitude outside UK (< -8 or > 2)\": len(df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))),\n",
" \"latitude outside UK (< 49 or > 61)\": len(\n",
" df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))\n",
" ),\n",
" \"longitude outside UK (< -8 or > 2)\": len(\n",
" df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))\n",
" ),\n",
" \"house_share = true\": len(df.filter(pl.col(\"house_share\"))),\n",
"}\n",
"print(\"Data quality issues:\")\n",
@ -1126,8 +1134,12 @@
"# Price histogram (clipped to 2nd-98th percentile)\n",
"lo, hi = price.quantile(0.02), price.quantile(0.98)\n",
"clipped = clean.filter((pl.col(\"price\") >= lo) & (pl.col(\"price\") <= hi))\n",
"fig = px.histogram(clipped.to_pandas(), x=\"price\", nbins=80,\n",
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\")\n",
"fig = px.histogram(\n",
" clipped.to_pandas(),\n",
" x=\"price\",\n",
" nbins=80,\n",
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Asking Price (£)\", yaxis_title=\"Count\")\n",
"fig.show()"
]
@ -439978,9 +439990,13 @@
],
"source": [
"# Price by property type\n",
"fig = px.box(clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n",
" x=\"property_type\", y=\"price\", color=\"property_type\",\n",
" title=\"Price by Property Type (capped at £2M for readability)\")\n",
"fig = px.box(\n",
" clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n",
" x=\"property_type\",\n",
" y=\"price\",\n",
" color=\"property_type\",\n",
" title=\"Price by Property Type (capped at £2M for readability)\",\n",
")\n",
"fig.update_layout(height=500, showlegend=False, yaxis_title=\"Price (£)\")\n",
"fig.show()"
]
@ -440079,9 +440095,7 @@
"source": [
"# Price qualifier breakdown\n",
"pq = clean[\"price_qualifier\"].value_counts().sort(\"count\", descending=True)\n",
"pq = pq.with_columns(\n",
" (pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\")\n",
")\n",
"pq = pq.with_columns((pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\"))\n",
"pq"
]
},
@ -440928,8 +440942,12 @@
"source": [
"# Property type distribution\n",
"type_counts = clean[\"property_type\"].value_counts().sort(\"count\", descending=True)\n",
"fig = px.pie(type_counts.to_pandas(), names=\"property_type\", values=\"count\",\n",
" title=\"Property Type Distribution\")\n",
"fig = px.pie(\n",
" type_counts.to_pandas(),\n",
" names=\"property_type\",\n",
" values=\"count\",\n",
" title=\"Property Type Distribution\",\n",
")\n",
"fig.update_layout(height=400)\n",
"fig.show()"
]
@ -441805,9 +441823,16 @@
],
"source": [
"# Top 20 sub-types\n",
"sub_counts = clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n",
"fig = px.bar(sub_counts.to_pandas(), x=\"count\", y=\"property_sub_type\", orientation=\"h\",\n",
" title=\"Top 20 Property Sub-types\")\n",
"sub_counts = (\n",
" clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n",
")\n",
"fig = px.bar(\n",
" sub_counts.to_pandas(),\n",
" x=\"count\",\n",
" y=\"property_sub_type\",\n",
" orientation=\"h\",\n",
" title=\"Top 20 Property Sub-types\",\n",
")\n",
"fig.update_layout(height=600, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()"
]
@ -442643,9 +442668,15 @@
],
"source": [
"# Tenure split\n",
"tenure_counts = clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n",
"fig = px.pie(tenure_counts.to_pandas(), names=\"tenure\", values=\"count\",\n",
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count()/len(clean)*100:.1f}% missing)\")\n",
"tenure_counts = (\n",
" clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n",
")\n",
"fig = px.pie(\n",
" tenure_counts.to_pandas(),\n",
" names=\"tenure\",\n",
" values=\"count\",\n",
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count() / len(clean) * 100:.1f}% missing)\",\n",
")\n",
"fig.update_layout(height=400)\n",
"fig.show()"
]
@ -443546,8 +443577,14 @@
" .agg(pl.len().alias(\"count\"))\n",
" .sort(\"property_type\")\n",
")\n",
"fig = px.bar(tenure_by_type.to_pandas(), x=\"property_type\", y=\"count\", color=\"tenure\",\n",
" barmode=\"group\", title=\"Tenure by Property Type\")\n",
"fig = px.bar(\n",
" tenure_by_type.to_pandas(),\n",
" x=\"property_type\",\n",
" y=\"count\",\n",
" color=\"tenure\",\n",
" barmode=\"group\",\n",
" title=\"Tenure by Property Type\",\n",
")\n",
"fig.update_layout(height=400)\n",
"fig.show()"
]
@ -444412,9 +444449,12 @@
],
"source": [
"# Bedroom distribution\n",
"bed_counts = clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n",
"fig = px.bar(bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\",\n",
" title=\"Bedroom Count Distribution\")\n",
"bed_counts = (\n",
" clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n",
")\n",
"fig = px.bar(\n",
" bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\", title=\"Bedroom Count Distribution\"\n",
")\n",
"fig.update_layout(height=400)\n",
"fig.show()"
]
@ -445279,16 +445319,25 @@
")\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Bar(\n",
" x=price_by_beds[\"bedrooms\"], y=price_by_beds[\"median_price\"],\n",
" name=\"Median\", error_y=dict(type=\"data\",\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=price_by_beds[\"bedrooms\"],\n",
" y=price_by_beds[\"median_price\"],\n",
" name=\"Median\",\n",
" error_y=dict(\n",
" type=\"data\",\n",
" symmetric=False,\n",
" array=(price_by_beds[\"p75\"] - price_by_beds[\"median_price\"]).to_list(),\n",
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list()\n",
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list(),\n",
" ),\n",
" )\n",
")\n",
"fig.update_layout(\n",
" title=\"Median Price by Bedrooms (with IQR)\",\n",
" height=400,\n",
" xaxis_title=\"Bedrooms\",\n",
" yaxis_title=\"Price (£)\",\n",
")\n",
"))\n",
"fig.update_layout(title=\"Median Price by Bedrooms (with IQR)\", height=400,\n",
" xaxis_title=\"Bedrooms\", yaxis_title=\"Price (£)\")\n",
"fig.show()"
]
},
@ -446263,8 +446312,14 @@
" .agg(pl.len().alias(\"count\"))\n",
" .sort(\"property_type\", \"bedrooms\")\n",
")\n",
"fig = px.bar(beds_by_type.to_pandas(), x=\"bedrooms\", y=\"count\", color=\"property_type\",\n",
" barmode=\"group\", title=\"Bedroom Distribution by Property Type\")\n",
"fig = px.bar(\n",
" beds_by_type.to_pandas(),\n",
" x=\"bedrooms\",\n",
" y=\"count\",\n",
" color=\"property_type\",\n",
" barmode=\"group\",\n",
" title=\"Bedroom Distribution by Property Type\",\n",
")\n",
"fig.update_layout(height=450)\n",
"fig.show()"
]
@ -446323,19 +446378,26 @@
],
"source": [
"# Floorspace availability by property type\n",
"has_floor = clean.with_columns(pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\"))\n",
"floor_by_type = (\n",
" has_floor.group_by(\"property_type\", \"has_floorspace\")\n",
" .agg(pl.len().alias(\"count\"))\n",
"has_floor = clean.with_columns(\n",
" pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\")\n",
")\n",
"floor_by_type = has_floor.group_by(\"property_type\", \"has_floorspace\").agg(\n",
" pl.len().alias(\"count\")\n",
")\n",
"totals = floor_by_type.group_by(\"property_type\").agg(\n",
" pl.col(\"count\").sum().alias(\"total\")\n",
")\n",
"totals = floor_by_type.group_by(\"property_type\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"floor_pct = (\n",
" floor_by_type.filter(pl.col(\"has_floorspace\"))\n",
" .join(totals, on=\"property_type\")\n",
" .with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\"))\n",
" .with_columns(\n",
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\")\n",
" )\n",
" .sort(\"pct_with_floorspace\", descending=True)\n",
")\n",
"print(f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%)\")\n",
"print(\n",
" f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%)\"\n",
")\n",
"floor_pct.select(\"property_type\", \"count\", \"total\", \"pct_with_floorspace\")"
]
},
@ -447298,8 +447360,13 @@
")\n",
"print(f\"Properties with reasonable floorspace (10-1000 sqm): {len(with_floor):,}\")\n",
"\n",
"fig = px.histogram(with_floor.to_pandas(), x=\"floorspace_sqm\", nbins=80, color=\"property_type\",\n",
" title=\"Floorspace Distribution by Property Type\")\n",
"fig = px.histogram(\n",
" with_floor.to_pandas(),\n",
" x=\"floorspace_sqm\",\n",
" nbins=80,\n",
" color=\"property_type\",\n",
" title=\"Floorspace Distribution by Property Type\",\n",
")\n",
"fig.update_layout(height=450, xaxis_title=\"Floorspace (sqm)\", barmode=\"overlay\")\n",
"fig.update_traces(opacity=0.6)\n",
"fig.show()"
@ -448176,8 +448243,12 @@
"print(f\" P25: £{s.quantile(0.25):,.0f}/sqm\")\n",
"print(f\" P75: £{s.quantile(0.75):,.0f}/sqm\")\n",
"\n",
"fig = px.histogram(ppsqm.to_pandas(), x=\"price_per_sqm\", nbins=80,\n",
" title=\"Price per Square Metre Distribution\")\n",
"fig = px.histogram(\n",
" ppsqm.to_pandas(),\n",
" x=\"price_per_sqm\",\n",
" nbins=80,\n",
" title=\"Price per Square Metre Distribution\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Price per sqm (£)\")\n",
"fig.show()"
]
@ -584906,8 +584977,13 @@
}
],
"source": [
"fig = px.box(ppsqm.to_pandas(), x=\"property_type\", y=\"price_per_sqm\", color=\"property_type\",\n",
" title=\"Price per sqm by Property Type\")\n",
"fig = px.box(\n",
" ppsqm.to_pandas(),\n",
" x=\"property_type\",\n",
" y=\"price_per_sqm\",\n",
" color=\"property_type\",\n",
" title=\"Price per sqm by Property Type\",\n",
")\n",
"fig.update_layout(height=450, showlegend=False, yaxis_title=\"£ per sqm\")\n",
"fig.show()"
]
@ -585865,9 +585941,15 @@
")\n",
"\n",
"top30 = outcode_stats.head(30)\n",
"fig = px.bar(top30.to_pandas(), x=\"count\", y=\"outcode\", orientation=\"h\",\n",
" color=\"median_price\", color_continuous_scale=\"Viridis\",\n",
" title=\"Top 30 Outcodes by Listing Volume\")\n",
"fig = px.bar(\n",
" top30.to_pandas(),\n",
" x=\"count\",\n",
" y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"median_price\",\n",
" color_continuous_scale=\"Viridis\",\n",
" title=\"Top 30 Outcodes by Listing Volume\",\n",
")\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()"
]
@ -587400,11 +587482,25 @@
],
"source": [
"# Most expensive outcodes (min 50 listings)\n",
"expensive = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\", descending=True).head(30)\n",
"fig = px.bar(expensive.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n",
" color=\"count\", color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\")\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"}, xaxis_title=\"Median Price (£)\")\n",
"expensive = (\n",
" outcode_stats.filter(pl.col(\"count\") >= 50)\n",
" .sort(\"median_price\", descending=True)\n",
" .head(30)\n",
")\n",
"fig = px.bar(\n",
" expensive.to_pandas(),\n",
" x=\"median_price\",\n",
" y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"count\",\n",
" color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\",\n",
")\n",
"fig.update_layout(\n",
" height=700,\n",
" yaxis={\"categoryorder\": \"total ascending\"},\n",
" xaxis_title=\"Median Price (£)\",\n",
")\n",
"fig.show()"
]
},
@ -588914,10 +589010,20 @@
"source": [
"# Cheapest outcodes (min 50 listings)\n",
"cheapest = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\").head(30)\n",
"fig = px.bar(cheapest.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n",
" color=\"count\", color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\")\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total descending\"}, xaxis_title=\"Median Price (£)\")\n",
"fig = px.bar(\n",
" cheapest.to_pandas(),\n",
" x=\"median_price\",\n",
" y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"count\",\n",
" color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\",\n",
")\n",
"fig.update_layout(\n",
" height=700,\n",
" yaxis={\"categoryorder\": \"total descending\"},\n",
" xaxis_title=\"Median Price (£)\",\n",
")\n",
"fig.show()"
]
},
@ -589828,14 +589934,19 @@
"source": [
"# Geographic scatter of listings (sample for performance)\n",
"sample = clean.sample(n=min(20_000, len(clean)), seed=42)\n",
"fig = px.scatter_map(sample.to_pandas(),\n",
" lat=\"latitude\", lon=\"longitude\",\n",
" color=\"price\", size_max=4,\n",
"fig = px.scatter_map(\n",
" sample.to_pandas(),\n",
" lat=\"latitude\",\n",
" lon=\"longitude\",\n",
" color=\"price\",\n",
" size_max=4,\n",
" color_continuous_scale=\"Viridis\",\n",
" range_color=[100_000, 1_500_000],\n",
" zoom=5, center={\"lat\": 52.5, \"lon\": -1.5},\n",
" zoom=5,\n",
" center={\"lat\": 52.5, \"lon\": -1.5},\n",
" title=\"Listing Locations (20k sample, colored by price)\",\n",
" opacity=0.4)\n",
" opacity=0.4,\n",
")\n",
"fig.update_layout(height=700)\n",
"fig.show()"
]
@ -589864,7 +589975,9 @@
"source": [
"# Parse dates and look at listing age\n",
"with_dates = clean.with_columns(\n",
" pl.col(\"first_visible_date\").str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\").alias(\"listed_at\"),\n",
" pl.col(\"first_visible_date\")\n",
" .str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
" .alias(\"listed_at\"),\n",
")\n",
"\n",
"print(f\"Date range: {with_dates['listed_at'].min()} to {with_dates['listed_at'].max()}\")"
@ -590856,8 +590969,9 @@
" .sort(\"month\")\n",
")\n",
"\n",
"fig = px.bar(monthly.to_pandas(), x=\"month\", y=\"count\",\n",
" title=\"Listings by Month Listed\")\n",
"fig = px.bar(\n",
" monthly.to_pandas(), x=\"month\", y=\"count\", title=\"Listings by Month Listed\"\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Month\", yaxis_title=\"Listings\")\n",
"fig.show()"
]
@ -590884,6 +590998,7 @@
"source": [
"# How old are current listings? (days since first visible)\n",
"import datetime\n",
"\n",
"now = datetime.datetime(2026, 2, 14)\n",
"with_age = with_dates.with_columns(\n",
" ((pl.lit(now) - pl.col(\"listed_at\")).dt.total_days()).alias(\"days_on_market\")\n",
@ -591749,8 +591864,12 @@
"source": [
"# Days on market distribution (cap at 2 years for readability)\n",
"capped = with_age.filter(pl.col(\"days_on_market\") <= 730)\n",
"fig = px.histogram(capped.to_pandas(), x=\"days_on_market\", nbins=100,\n",
" title=\"Days on Market Distribution (capped at 2 years)\")\n",
"fig = px.histogram(\n",
" capped.to_pandas(),\n",
" x=\"days_on_market\",\n",
" nbins=100,\n",
" title=\"Days on Market Distribution (capped at 2 years)\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Days on Market\", yaxis_title=\"Count\")\n",
"fig.show()"
]
@ -591887,7 +592006,9 @@
"\n",
"# Most common features (lowercased for grouping)\n",
"feature_counts = (\n",
" features_exploded.with_columns(pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\"))\n",
" features_exploded.with_columns(\n",
" pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\")\n",
" )\n",
" .group_by(\"feature_lower\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .sort(\"count\", descending=True)\n",
@ -592794,16 +592915,64 @@
"all_features = features_exploded[\"features\"].to_list()\n",
"word_counter = Counter()\n",
"for feat in all_features:\n",
" words = re.findall(r'[a-z]+', feat.lower())\n",
" words = re.findall(r\"[a-z]+\", feat.lower())\n",
" word_counter.update(words)\n",
"\n",
"# Filter out very short/common words\n",
"stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'to', 'in', 'with', 'for', 'on', 'at', 'by', 'is', 'it', 'from', 'as', 'be', 'this', 'that', 'are', 'was', 'has', 'have', 'not', 'but', 'all', 'can', 'had', 'her', 'his', 'one', 'our', 'out', 'you', 'will'}\n",
"keywords = [(w, c) for w, c in word_counter.most_common(100) if w not in stop_words and len(w) > 2]\n",
"kw_df = pl.DataFrame({\"word\": [w for w,c in keywords[:40]], \"count\": [c for w,c in keywords[:40]]})\n",
"stop_words = {\n",
" \"the\",\n",
" \"a\",\n",
" \"an\",\n",
" \"and\",\n",
" \"or\",\n",
" \"of\",\n",
" \"to\",\n",
" \"in\",\n",
" \"with\",\n",
" \"for\",\n",
" \"on\",\n",
" \"at\",\n",
" \"by\",\n",
" \"is\",\n",
" \"it\",\n",
" \"from\",\n",
" \"as\",\n",
" \"be\",\n",
" \"this\",\n",
" \"that\",\n",
" \"are\",\n",
" \"was\",\n",
" \"has\",\n",
" \"have\",\n",
" \"not\",\n",
" \"but\",\n",
" \"all\",\n",
" \"can\",\n",
" \"had\",\n",
" \"her\",\n",
" \"his\",\n",
" \"one\",\n",
" \"our\",\n",
" \"out\",\n",
" \"you\",\n",
" \"will\",\n",
"}\n",
"keywords = [\n",
" (w, c)\n",
" for w, c in word_counter.most_common(100)\n",
" if w not in stop_words and len(w) > 2\n",
"]\n",
"kw_df = pl.DataFrame(\n",
" {\"word\": [w for w, c in keywords[:40]], \"count\": [c for w, c in keywords[:40]]}\n",
")\n",
"\n",
"fig = px.bar(kw_df.to_pandas(), x=\"count\", y=\"word\", orientation=\"h\",\n",
" title=\"Most Common Words in Feature Descriptions\")\n",
"fig = px.bar(\n",
" kw_df.to_pandas(),\n",
" x=\"count\",\n",
" y=\"word\",\n",
" orientation=\"h\",\n",
" title=\"Most Common Words in Feature Descriptions\",\n",
")\n",
"fig.update_layout(height=800, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()"
]
@ -593767,9 +593936,14 @@
" & (pl.col(\"price\") < 3_000_000)\n",
").sample(n=min(15_000, len(with_floor)), seed=42)\n",
"\n",
"fig = px.scatter(scatter_df.to_pandas(), x=\"floorspace_sqm\", y=\"price\",\n",
" color=\"property_type\", opacity=0.3,\n",
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\")\n",
"fig = px.scatter(\n",
" scatter_df.to_pandas(),\n",
" x=\"floorspace_sqm\",\n",
" y=\"price\",\n",
" color=\"property_type\",\n",
" opacity=0.3,\n",
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\",\n",
")\n",
"fig.update_layout(height=600, xaxis_title=\"Floorspace (sqm)\", yaxis_title=\"Price (£)\")\n",
"fig.show()"
]
@ -594739,8 +594913,14 @@
" .agg(pl.col(\"price\").median().alias(\"median_price\"), pl.len().alias(\"count\"))\n",
" .sort(\"property_type\", \"bedrooms\")\n",
")\n",
"fig = px.line(bp.to_pandas(), x=\"bedrooms\", y=\"median_price\", color=\"property_type\",\n",
" markers=True, title=\"Median Price by Bedrooms and Property Type\")\n",
"fig = px.line(\n",
" bp.to_pandas(),\n",
" x=\"bedrooms\",\n",
" y=\"median_price\",\n",
" color=\"property_type\",\n",
" markers=True,\n",
" title=\"Median Price by Bedrooms and Property Type\",\n",
")\n",
"fig.update_layout(height=450, xaxis_title=\"Bedrooms\", yaxis_title=\"Median Price (£)\")\n",
"fig.show()"
]
@ -594789,17 +594969,27 @@
"print(f\"Total listings: {len(clean):,}\")\n",
"print(f\"Outcodes covered: {clean['outcode'].n_unique():,}\")\n",
"print(\"\")\n",
"print(f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\")\n",
"print(f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\")\n",
"print(\n",
" f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\"\n",
")\n",
"print(\n",
" f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\"\n",
")\n",
"print(\"\")\n",
"print(f\"Tenure known: {(len(clean) - clean['tenure'].null_count())/len(clean)*100:.1f}%\")\n",
"print(\n",
" f\"Tenure known: {(len(clean) - clean['tenure'].null_count()) / len(clean) * 100:.1f}%\"\n",
")\n",
"print(f\" Freehold: {len(clean.filter(pl.col('tenure') == 'Freehold')):,}\")\n",
"print(f\" Leasehold: {len(clean.filter(pl.col('tenure') == 'Leasehold')):,}\")\n",
"print(\"\")\n",
"print(f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%\")\n",
"print(\n",
" f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%\"\n",
")\n",
"print(\"\")\n",
"print(\"Property types:\")\n",
"for row in clean['property_type'].value_counts().sort('count', descending=True).iter_rows():\n",
"for row in (\n",
" clean[\"property_type\"].value_counts().sort(\"count\", descending=True).iter_rows()\n",
"):\n",
" print(f\" {row[0]}: {row[1]:,} ({row[1] / len(clean) * 100:.1f}%)\")"
]
}

View file

@ -52,6 +52,7 @@
"buy = pl.read_parquet(f\"{DATA}/online_listings_buy.parquet\")\n",
"rent = pl.read_parquet(f\"{DATA}/online_listings_rent.parquet\")\n",
"\n",
"\n",
"def tag_source(df: pl.DataFrame) -> pl.DataFrame:\n",
" return df.with_columns(\n",
" pl.when(pl.col(\"Listing URL\").str.contains(\"rightmove\"))\n",
@ -62,6 +63,7 @@
" .alias(\"source\")\n",
" )\n",
"\n",
"\n",
"buy = tag_source(buy)\n",
"rent = tag_source(rent)\n",
"\n",
@ -987,7 +989,8 @@
"# Venn-style summary\n",
"rm_buy = len(buy.filter(pl.col(\"source\") == \"Rightmove\"))\n",
"\n",
"fig = go.Figure(go.Sankey(\n",
"fig = go.Figure(\n",
" go.Sankey(\n",
" node=dict(\n",
" label=[\n",
" f\"Rightmove\\n{rm_buy:,}\",\n",
@ -1001,9 +1004,14 @@
" source=[0, 1, 1],\n",
" target=[2, 2, 3],\n",
" value=[rm_buy, hk_buy_unique, CROSS_DEDUP_BUY],\n",
" color=[\"rgba(37,99,235,0.3)\", \"rgba(16,185,129,0.3)\", \"rgba(239,68,68,0.3)\"],\n",
" color=[\n",
" \"rgba(37,99,235,0.3)\",\n",
" \"rgba(16,185,129,0.3)\",\n",
" \"rgba(239,68,68,0.3)\",\n",
" ],\n",
" ),\n",
"))\n",
" )\n",
")\n",
"fig.update_layout(title=\"BUY Channel: Source Contribution Flow\", height=350)\n",
"fig.show()"
]
@ -1106,8 +1114,11 @@
"oc_comparison = (\n",
" hk_by_oc.join(rm_by_oc, on=\"outcode\", how=\"left\")\n",
" .with_columns(\n",
" (pl.col(\"hk_count\") / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0)) * 100)\n",
" .alias(\"hk_pct_of_total\")\n",
" (\n",
" pl.col(\"hk_count\")\n",
" / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0))\n",
" * 100\n",
" ).alias(\"hk_pct_of_total\")\n",
" )\n",
" .sort(\"hk_count\", descending=True)\n",
")\n",
@ -2215,18 +2226,28 @@
"source": [
"# Bar chart: home.co.uk vs Rightmove counts per outcode\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Bar(\n",
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"rm_count\"],\n",
" name=\"Rightmove\", marker_color=\"#2563eb\",\n",
"))\n",
"fig.add_trace(go.Bar(\n",
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"hk_count\"],\n",
" name=\"Home.co.uk\", marker_color=\"#10b981\",\n",
"))\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=oc_comparison[\"outcode\"],\n",
" y=oc_comparison[\"rm_count\"],\n",
" name=\"Rightmove\",\n",
" marker_color=\"#2563eb\",\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=oc_comparison[\"outcode\"],\n",
" y=oc_comparison[\"hk_count\"],\n",
" name=\"Home.co.uk\",\n",
" marker_color=\"#10b981\",\n",
" )\n",
")\n",
"fig.update_layout(\n",
" barmode=\"group\", height=400,\n",
" barmode=\"group\",\n",
" height=400,\n",
" title=\"Listings per Outcode: Rightmove vs Home.co.uk (outcodes with HK coverage)\",\n",
" xaxis_title=\"Outcode\", yaxis_title=\"Listings\",\n",
" xaxis_title=\"Outcode\",\n",
" yaxis_title=\"Listings\",\n",
")\n",
"fig.show()"
]
@ -3121,10 +3142,14 @@
"sample = covered.sample(n=min(30_000, len(covered)), seed=42)\n",
"\n",
"fig = px.scatter_map(\n",
" sample.to_pandas(), lat=\"lat\", lon=\"lon\",\n",
" sample.to_pandas(),\n",
" lat=\"lat\",\n",
" lon=\"lon\",\n",
" color=\"source\",\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" zoom=7, opacity=0.4, size_max=4,\n",
" zoom=7,\n",
" opacity=0.4,\n",
" size_max=4,\n",
" title=\"Listing Locations in Covered Outcodes (by source)\",\n",
")\n",
"fig.update_layout(height=600)\n",
@ -3188,15 +3213,41 @@
"# For covered outcodes, compare home.co.uk listings against Rightmove\n",
"# to find near-matches (same postcode, same beds, price within 5%)\n",
"\n",
"hk = buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\").select(\n",
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n",
").rename({\"Asking price\": \"hk_price\", \"Property type\": \"hk_type\", \"Address per Property Register\": \"hk_addr\"})\n",
"hk = (\n",
" buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\")\n",
" .select(\n",
" \"Postcode\",\n",
" \"Bedrooms\",\n",
" \"Asking price\",\n",
" \"Property type\",\n",
" \"Address per Property Register\",\n",
" )\n",
" .rename(\n",
" {\n",
" \"Asking price\": \"hk_price\",\n",
" \"Property type\": \"hk_type\",\n",
" \"Address per Property Register\": \"hk_addr\",\n",
" }\n",
" )\n",
")\n",
"\n",
"rm = buy_oc.filter(\n",
" pl.col(\"source\") == \"Rightmove\"\n",
").select(\n",
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n",
").rename({\"Asking price\": \"rm_price\", \"Property type\": \"rm_type\", \"Address per Property Register\": \"rm_addr\"})\n",
"rm = (\n",
" buy_oc.filter(pl.col(\"source\") == \"Rightmove\")\n",
" .select(\n",
" \"Postcode\",\n",
" \"Bedrooms\",\n",
" \"Asking price\",\n",
" \"Property type\",\n",
" \"Address per Property Register\",\n",
" )\n",
" .rename(\n",
" {\n",
" \"Asking price\": \"rm_price\",\n",
" \"Property type\": \"rm_type\",\n",
" \"Address per Property Register\": \"rm_addr\",\n",
" }\n",
" )\n",
")\n",
"\n",
"# Join on postcode + bedrooms\n",
"joined = hk.join(rm, on=[\"Postcode\", \"Bedrooms\"], how=\"inner\")\n",
@ -3213,16 +3264,24 @@
"exact = joined.filter(pl.col(\"hk_price\") == pl.col(\"rm_price\"))\n",
"\n",
"print(f\"Home.co.uk listings (unique, in file): {len(hk):,}\")\n",
"print(f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\")\n",
"print(\n",
" f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\"\n",
")\n",
"print()\n",
"print(f\"Joined on (postcode, bedrooms): {len(joined):,} candidate pairs\")\n",
"print(f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\")\n",
"print(f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\")\n",
"print(\n",
" f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\"\n",
")\n",
"print(\n",
" f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\"\n",
")\n",
"print()\n",
"# Unique hk listings that have at least one near-match\n",
"hk_with_near = near.select(\"hk_price\", \"hk_addr\", \"Postcode\").unique()\n",
"print(f\"Home.co.uk listings with a near-match in RM: ~{len(hk_with_near):,}\")\n",
"print(f\"Estimated additional overlap: ~{len(hk_with_near)/len(hk)*100:.1f}% of unique HK listings\")"
"print(\n",
" f\"Estimated additional overlap: ~{len(hk_with_near) / len(hk) * 100:.1f}% of unique HK listings\"\n",
")"
]
},
{
@ -4178,9 +4237,13 @@
")\n",
"\n",
"fig = px.histogram(\n",
" clipped.to_pandas(), x=\"Asking price\", color=\"source\", nbins=80,\n",
" clipped.to_pandas(),\n",
" x=\"Asking price\",\n",
" color=\"source\",\n",
" nbins=80,\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" barmode=\"overlay\", histnorm=\"probability density\",\n",
" barmode=\"overlay\",\n",
" histnorm=\"probability density\",\n",
" title=\"Price Distribution by Source (normalised, £50k£2M)\",\n",
")\n",
"fig.update_traces(opacity=0.6)\n",
@ -5095,10 +5158,7 @@
],
"source": [
"# Property type distribution by source\n",
"type_by_src = (\n",
" buy.group_by(\"source\", \"Property type\")\n",
" .agg(pl.len().alias(\"count\"))\n",
")\n",
"type_by_src = buy.group_by(\"source\", \"Property type\").agg(pl.len().alias(\"count\"))\n",
"# Normalise within each source\n",
"totals = type_by_src.group_by(\"source\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"type_by_src = type_by_src.join(totals, on=\"source\").with_columns(\n",
@ -5107,7 +5167,10 @@
"\n",
"fig = px.bar(\n",
" type_by_src.sort(\"Property type\").to_pandas(),\n",
" x=\"Property type\", y=\"pct\", color=\"source\", barmode=\"group\",\n",
" x=\"Property type\",\n",
" y=\"pct\",\n",
" color=\"source\",\n",
" barmode=\"group\",\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" title=\"Property Type Distribution by Source (%)\",\n",
")\n",
@ -5186,7 +5249,9 @@
"# Property sub-type comparison — top home.co.uk sub-types\n",
"hk_subtypes = (\n",
" buy.filter(pl.col(\"source\") == \"Home.co.uk\")[\"Property sub-type\"]\n",
" .value_counts().sort(\"count\", descending=True).head(20)\n",
" .value_counts()\n",
" .sort(\"count\", descending=True)\n",
" .head(20)\n",
")\n",
"print(\"Top 20 Home.co.uk property sub-types:\")\n",
"hk_subtypes"
@ -5263,9 +5328,16 @@
"source": [
"# Field completeness by source\n",
"fields = [\n",
" \"Bedrooms\", \"Bathrooms\", \"Postcode\", \"Address per Property Register\",\n",
" \"Leasehold/Freehold\", \"Property type\", \"Total floor area (sqm)\",\n",
" \"Listing date\", \"Asking price\", \"Price qualifier\",\n",
" \"Bedrooms\",\n",
" \"Bathrooms\",\n",
" \"Postcode\",\n",
" \"Address per Property Register\",\n",
" \"Leasehold/Freehold\",\n",
" \"Property type\",\n",
" \"Total floor area (sqm)\",\n",
" \"Listing date\",\n",
" \"Asking price\",\n",
" \"Price qualifier\",\n",
"]\n",
"\n",
"rows = []\n",
@ -5276,17 +5348,19 @@
" non_null = n - subset[f].null_count()\n",
" # Also count empty strings as missing for string fields\n",
" if subset[f].dtype == pl.Utf8:\n",
" non_null = len(subset.filter(\n",
" pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0)\n",
" ))\n",
" non_null = len(\n",
" subset.filter(pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0))\n",
" )\n",
" rows.append({\"source\": src, \"field\": f, \"pct_available\": non_null / n * 100})\n",
"\n",
"completeness = pl.DataFrame(rows)\n",
"pivot = completeness.pivot(on=\"source\", index=\"field\", values=\"pct_available\")\n",
"pivot = pivot.with_columns([\n",
"pivot = pivot.with_columns(\n",
" [\n",
" pl.col(\"Rightmove\").round(1),\n",
" pl.col(\"Home.co.uk\").round(1),\n",
"])\n",
" ]\n",
")\n",
"print(\"Field completeness (% non-null/non-empty):\")\n",
"pivot"
]
@ -6198,19 +6272,26 @@
"# Bedroom distribution comparison\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"Rightmove\", \"Home.co.uk\"))\n",
"for i, src in enumerate([\"Rightmove\", \"Home.co.uk\"], 1):\n",
" beds = buy.filter(\n",
" (pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8)\n",
" )[\"Bedrooms\"].value_counts().sort(\"Bedrooms\")\n",
" beds = (\n",
" buy.filter((pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8))[\"Bedrooms\"]\n",
" .value_counts()\n",
" .sort(\"Bedrooms\")\n",
" )\n",
" # Normalise\n",
" total = beds[\"count\"].sum()\n",
" fig.add_trace(\n",
" go.Bar(\n",
" x=beds[\"Bedrooms\"], y=beds[\"count\"] / total * 100,\n",
" x=beds[\"Bedrooms\"],\n",
" y=beds[\"count\"] / total * 100,\n",
" name=src,\n",
" marker_color=\"#2563eb\" if src == \"Rightmove\" else \"#10b981\",\n",
" ), row=1, col=i,\n",
" ),\n",
" row=1,\n",
" col=i,\n",
" )\n",
"fig.update_layout(\n",
" height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False\n",
")\n",
"fig.update_layout(height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False)\n",
"fig.update_yaxes(title_text=\"%\", row=1, col=1)\n",
"fig.show()"
]
@ -6287,17 +6368,23 @@
"\n",
"comparison_rows = []\n",
"for ptype in [\"Detached\", \"Semi-Detached\", \"Terraced\", \"Flats/Maisonettes\", \"Other\"]:\n",
" rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
" rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\n",
" \"Asking price\"\n",
" ].drop_nulls()\n",
" hk_p = hk_only.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
" if len(rm_p) > 0 and len(hk_p) > 0:\n",
" comparison_rows.append({\n",
" comparison_rows.append(\n",
" {\n",
" \"Property type\": ptype,\n",
" \"RM count\": len(rm_p),\n",
" \"RM median £\": int(rm_p.median()),\n",
" \"HK count\": len(hk_p),\n",
" \"HK median £\": int(hk_p.median()),\n",
" \"HK premium %\": round((hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1),\n",
" })\n",
" \"HK premium %\": round(\n",
" (hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1\n",
" ),\n",
" }\n",
" )\n",
"\n",
"comp = pl.DataFrame(comparison_rows)\n",
"print(\"Price comparison in covered outcodes (Home.co.uk unique listings vs Rightmove):\")\n",
@ -7245,9 +7332,13 @@
"# Listing age histogram comparison\n",
"age_plot = with_age.filter(pl.col(\"days_on_market\") <= 730) # cap at 2 years\n",
"fig = px.histogram(\n",
" age_plot.to_pandas(), x=\"days_on_market\", color=\"source\", nbins=60,\n",
" age_plot.to_pandas(),\n",
" x=\"days_on_market\",\n",
" color=\"source\",\n",
" nbins=60,\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" barmode=\"overlay\", histnorm=\"probability density\",\n",
" barmode=\"overlay\",\n",
" histnorm=\"probability density\",\n",
" title=\"Days on Market Distribution by Source (normalised, capped at 2 years)\",\n",
")\n",
"fig.update_traces(opacity=0.6)\n",
@ -7330,7 +7421,9 @@
"print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n",
"print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n",
"print(f\" Projected unique additions: ~{projected_unique:,}\")\n",
"print(f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique/rm_buy*100:.1f}% increase)\")\n",
"print(\n",
" f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique / rm_buy * 100:.1f}% increase)\"\n",
")\n",
"print()\n",
"print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
]

View file

@ -54,11 +54,15 @@
}
],
"source": [
"r5_bank = pl.read_parquet(\"../property-data/travel-times/transit/000000-bank-tube-station.parquet\")\n",
"r5_bank = pl.read_parquet(\n",
" \"../property-data/travel-times/transit/000000-bank-tube-station.parquet\"\n",
")\n",
"manual_bank = pl.read_parquet(\"../manual-data/journey_times_bank.parquet\")\n",
"\n",
"print(f\"R5 Bank: {r5_bank.shape[0]:,} postcodes\")\n",
"print(f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\")"
"print(\n",
" f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\"\n",
")"
]
},
{
@ -116,25 +120,49 @@
"source": [
"# Join on postcode, keep only rows where both sources have values\n",
"bank = (\n",
" r5_bank\n",
" .join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
" r5_bank.join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
" .filter(\n",
" pl.col(\"public_transport_easy_minutes\").is_not_null()\n",
" & pl.col(\"public_transport_quick_minutes\").is_not_null()\n",
" )\n",
" .with_columns([\n",
" .with_columns(\n",
" [\n",
" # Signed error: R5 - Manual (positive = R5 is slower)\n",
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).alias(\"error_easy\"),\n",
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).alias(\"error_quick\"),\n",
" (\n",
" pl.col(\"travel_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
" ).alias(\"error_easy\"),\n",
" (\n",
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
" ).alias(\"error_quick\"),\n",
" # Absolute error\n",
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_easy\"),\n",
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_quick\"),\n",
" ])\n",
" (\n",
" pl.col(\"travel_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
" )\n",
" .abs()\n",
" .alias(\"abs_error_easy\"),\n",
" (\n",
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
" )\n",
" .abs()\n",
" .alias(\"abs_error_quick\"),\n",
" ]\n",
" )\n",
")\n",
"\n",
"print(f\"Joined (non-null): {bank.shape[0]:,} postcodes\")\n",
"bank.select(\"pcds\", \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n",
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\").head(10)"
"bank.select(\n",
" \"pcds\",\n",
" \"travel_minutes\",\n",
" \"public_transport_easy_minutes\",\n",
" \"error_easy\",\n",
" \"best_minutes\",\n",
" \"public_transport_quick_minutes\",\n",
" \"error_quick\",\n",
").head(10)"
]
},
{
@ -196,18 +224,23 @@
" percentiles = [5, 25, 50, 80, 90, 95, 99]\n",
" rows = []\n",
" for p in percentiles:\n",
" rows.append({\n",
" rows.append(\n",
" {\n",
" \"percentile\": f\"p{p}\",\n",
" f\"{label} signed error\": round(float(np.percentile(col, p)), 1),\n",
" f\"{label} absolute error\": round(float(np.percentile(abs_col, p)), 1),\n",
" })\n",
" rows.append({\n",
" }\n",
" )\n",
" rows.append(\n",
" {\n",
" \"percentile\": \"mean\",\n",
" f\"{label} signed error\": round(float(np.mean(col)), 1),\n",
" f\"{label} absolute error\": round(float(np.mean(abs_col)), 1),\n",
" })\n",
" }\n",
" )\n",
" return pl.DataFrame(rows)\n",
"\n",
"\n",
"stats_easy = percentile_stats(\"error_easy\", \"Median (easy)\")\n",
"stats_quick = percentile_stats(\"error_quick\", \"Best (quick)\")\n",
"\n",
@ -1120,24 +1153,42 @@
}
],
"source": [
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
"fig = make_subplots(\n",
" rows=1,\n",
" cols=2,\n",
" subplot_titles=[\n",
" \"Median transit time error (R5 TfL)\",\n",
" \"Best transit time error (R5 TfL)\"\n",
"])\n",
" \"Best transit time error (R5 TfL)\",\n",
" ],\n",
")\n",
"\n",
"# Clip for readability\n",
"easy_clipped = bank[\"error_easy\"].clip(-60, 60).to_numpy()\n",
"quick_clipped = bank[\"error_quick\"].clip(-60, 60).to_numpy()\n",
"\n",
"fig.add_trace(go.Histogram(x=easy_clipped, nbinsx=120, name=\"Median (easy)\",\n",
" marker_color=\"#0d9488\"), row=1, col=1)\n",
"fig.add_trace(go.Histogram(x=quick_clipped, nbinsx=120, name=\"Best (quick)\",\n",
" marker_color=\"#f59e0b\"), row=1, col=2)\n",
"fig.add_trace(\n",
" go.Histogram(\n",
" x=easy_clipped, nbinsx=120, name=\"Median (easy)\", marker_color=\"#0d9488\"\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Histogram(\n",
" x=quick_clipped, nbinsx=120, name=\"Best (quick)\", marker_color=\"#f59e0b\"\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n",
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=1)\n",
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=2)\n",
"fig.update_yaxes(title_text=\"Count\", row=1, col=1)\n",
"fig.update_layout(height=400, showlegend=False, title_text=\"Bank: Error Distribution (clipped ±60 min)\")\n",
"fig.update_layout(\n",
" height=400,\n",
" showlegend=False,\n",
" title_text=\"Bank: Error Distribution (clipped ±60 min)\",\n",
")\n",
"fig.show()"
]
},
@ -2104,34 +2155,55 @@
"# Sample for scatter plot performance\n",
"sample = bank.sample(n=min(20_000, bank.shape[0]), seed=42)\n",
"\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
" \"Median: R5 vs TfL (easy)\",\n",
" \"Best: R5 vs TfL (quick)\"\n",
"])\n",
"fig = make_subplots(\n",
" rows=1,\n",
" cols=2,\n",
" subplot_titles=[\"Median: R5 vs TfL (easy)\", \"Best: R5 vs TfL (quick)\"],\n",
")\n",
"\n",
"fig.add_trace(go.Scattergl(\n",
"fig.add_trace(\n",
" go.Scattergl(\n",
" x=sample[\"public_transport_easy_minutes\"].to_numpy(),\n",
" y=sample[\"travel_minutes\"].cast(pl.Float64).to_numpy(),\n",
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n",
" name=\"Median\"\n",
"), row=1, col=1)\n",
" mode=\"markers\",\n",
" marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n",
" name=\"Median\",\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"\n",
"fig.add_trace(go.Scattergl(\n",
"fig.add_trace(\n",
" go.Scattergl(\n",
" x=sample[\"public_transport_quick_minutes\"].to_numpy(),\n",
" y=sample[\"best_minutes\"].cast(pl.Float64).to_numpy(),\n",
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n",
" name=\"Best\"\n",
"), row=1, col=2)\n",
" mode=\"markers\",\n",
" marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n",
" name=\"Best\",\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n",
"# Perfect agreement line\n",
"for col in [1, 2]:\n",
" fig.add_trace(go.Scatter(x=[0, 200], y=[0, 200], mode=\"lines\",\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=[0, 200],\n",
" y=[0, 200],\n",
" mode=\"lines\",\n",
" line=dict(color=\"red\", dash=\"dash\", width=1),\n",
" showlegend=False), row=1, col=col)\n",
" showlegend=False,\n",
" ),\n",
" row=1,\n",
" col=col,\n",
" )\n",
" fig.update_xaxes(title_text=\"TfL API (minutes)\", row=1, col=col)\n",
" fig.update_yaxes(title_text=\"R5 (minutes)\", row=1, col=col)\n",
"\n",
"fig.update_layout(height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\")\n",
"fig.update_layout(\n",
" height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\"\n",
")\n",
"fig.show()"
]
},
@ -403063,7 +403135,8 @@
"\n",
"fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n",
" lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"error_easy\",\n",
" color_continuous_scale=\"RdBu_r\", # red=positive (R5 slower), blue=negative (R5 faster)\n",
" range_color=[-30, 30],\n",
@ -403071,8 +403144,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n",
" title=\"Bank — Median transit error (R5 TfL easy), minutes\",\n",
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n",
" \"error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n",
" hover_data={\n",
" \"pcds\": True,\n",
" \"travel_minutes\": True,\n",
" \"public_transport_easy_minutes\": True,\n",
" \"error_easy\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n",
")\n",
"fig.update_layout(map_style=\"carto-positron\")\n",
@ -803994,7 +804073,8 @@
"source": [
"fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n",
" lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"error_quick\",\n",
" color_continuous_scale=\"RdBu_r\",\n",
" range_color=[-30, 30],\n",
@ -804002,8 +804082,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n",
" title=\"Bank — Best transit error (R5 TfL quick), minutes\",\n",
" hover_data={\"pcds\": True, \"best_minutes\": True, \"public_transport_quick_minutes\": True,\n",
" \"error_quick\": \":.0f\", \"lat\": False, \"long\": False},\n",
" hover_data={\n",
" \"pcds\": True,\n",
" \"best_minutes\": True,\n",
" \"public_transport_quick_minutes\": True,\n",
" \"error_quick\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n",
")\n",
"fig.update_layout(map_style=\"carto-positron\")\n",
@ -1204925,7 +1205011,8 @@
"source": [
"fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n",
" lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"abs_error_easy\",\n",
" color_continuous_scale=\"YlOrRd\",\n",
" range_color=[0, 30],\n",
@ -1204933,8 +1205020,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n",
" title=\"Bank — Absolute median transit error |R5 TfL easy|, minutes\",\n",
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n",
" \"abs_error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n",
" hover_data={\n",
" \"pcds\": True,\n",
" \"travel_minutes\": True,\n",
" \"public_transport_easy_minutes\": True,\n",
" \"abs_error_easy\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n",
")\n",
"fig.update_layout(map_style=\"carto-positron\")\n",
@ -1204998,9 +1205091,15 @@
],
"source": [
"bank.sort(\"abs_error_easy\", descending=True).select(\n",
" \"pcds\", \"lat\", \"long\",\n",
" \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n",
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\",\n",
" \"pcds\",\n",
" \"lat\",\n",
" \"long\",\n",
" \"travel_minutes\",\n",
" \"public_transport_easy_minutes\",\n",
" \"error_easy\",\n",
" \"best_minutes\",\n",
" \"public_transport_quick_minutes\",\n",
" \"error_quick\",\n",
").head(30)"
]
},
@ -1205945,45 +1206044,75 @@
"\n",
"dist_df = bank.with_columns(\n",
" # Rough km distance using Haversine approximation\n",
" ((((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2 +\n",
" ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2) ** 0.5\n",
" (\n",
" (\n",
" ((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2\n",
" + ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2\n",
" )\n",
" ** 0.5\n",
" ).alias(\"dist_km\")\n",
")\n",
"\n",
"# Bin by 5km\n",
"binned = (\n",
" dist_df\n",
" .with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
" dist_df.with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
" .group_by(\"dist_km\")\n",
" .agg([\n",
" .agg(\n",
" [\n",
" pl.col(\"error_easy\").median().alias(\"median_error_easy\"),\n",
" pl.col(\"error_quick\").median().alias(\"median_error_quick\"),\n",
" pl.col(\"abs_error_easy\").median().alias(\"median_abs_error_easy\"),\n",
" pl.len().alias(\"count\"),\n",
" ])\n",
" ]\n",
" )\n",
" .sort(\"dist_km\")\n",
" .filter(pl.col(\"count\") > 50)\n",
")\n",
"\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
"fig = make_subplots(\n",
" rows=1,\n",
" cols=2,\n",
" subplot_titles=[\n",
" \"Median signed error by distance\",\n",
" \"Median absolute error by distance\"\n",
"])\n",
" \"Median absolute error by distance\",\n",
" ],\n",
")\n",
"\n",
"fig.add_trace(go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_easy\"].to_numpy(),\n",
" mode=\"lines+markers\", name=\"Easy\", line=dict(color=\"#0d9488\")\n",
"), row=1, col=1)\n",
"fig.add_trace(go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_quick\"].to_numpy(),\n",
" mode=\"lines+markers\", name=\"Quick\", line=dict(color=\"#f59e0b\")\n",
"), row=1, col=1)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(),\n",
" y=binned[\"median_error_easy\"].to_numpy(),\n",
" mode=\"lines+markers\",\n",
" name=\"Easy\",\n",
" line=dict(color=\"#0d9488\"),\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(),\n",
" y=binned[\"median_error_quick\"].to_numpy(),\n",
" mode=\"lines+markers\",\n",
" name=\"Quick\",\n",
" line=dict(color=\"#f59e0b\"),\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"\n",
"fig.add_trace(go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_abs_error_easy\"].to_numpy(),\n",
" mode=\"lines+markers\", name=\"|Easy|\", line=dict(color=\"#0d9488\"),\n",
" showlegend=False\n",
"), row=1, col=2)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(),\n",
" y=binned[\"median_abs_error_easy\"].to_numpy(),\n",
" mode=\"lines+markers\",\n",
" name=\"|Easy|\",\n",
" line=dict(color=\"#0d9488\"),\n",
" showlegend=False,\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n",
"for col in [1, 2]:\n",
" fig.update_xaxes(title_text=\"Distance from Bank (km)\", row=1, col=col)\n",

View file

@ -146,6 +146,12 @@ services:
# networks:
# - dev-network
# restart: unless-stopped
# healthcheck:
# test: ["CMD", "curl", "-f", "http://localhost:8191/health"]
# interval: 30s
# timeout: 5s
# retries: 3
# start_period: 30s
# finder:
# build:
@ -161,8 +167,14 @@ services:
# gluetun:
# condition: service_healthy
# flaresolverr:
# condition: service_started
# condition: service_healthy
# restart: unless-stopped
# healthcheck:
# test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:1234/health')"]
# interval: 30s
# timeout: 5s
# retries: 3
# start_period: 60s
volumes:

View file

@ -5,9 +5,14 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
WORKDIR /app
COPY pyproject.toml ./
RUN uv pip install --system -r pyproject.toml
RUN playwright install --with-deps chromium
RUN playwright install-deps firefox
RUN camoufox fetch \
&& python -c "from camoufox.pkgman import camoufox_path; p = camoufox_path(download_if_missing=False); print('Camoufox verified at', p)"
COPY *.py ./
COPY property-data/arcgis_data.parquet /data/arcgis_data.parquet
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:1234/health')"
CMD ["python3", "main.py"]

View file

@ -4,8 +4,8 @@ from pathlib import Path
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
DATA_DIR = Path("/app/data")
PAGE_SIZE = 24
DELAY_BETWEEN_PAGES = 1.0
DELAY_BETWEEN_OUTCODES = 2.0
DELAY_BETWEEN_PAGES = 0.5
DELAY_BETWEEN_OUTCODES = 1.0
MAX_RETRIES = 3
RETRY_BASE_DELAY = 2.0
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
@ -16,9 +16,29 @@ SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "3"))
# Whether to run a scrape immediately on startup
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
# Enable/disable individual sources
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in ("1", "true", "yes")
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in ("1", "true", "yes")
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in ("1", "true", "yes")
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in (
"1",
"true",
"yes",
)
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in (
"1",
"true",
"yes",
)
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
"1",
"true",
"yes",
)
SCRAPE_ZOOPLA = os.environ.get("SCRAPE_ZOOPLA", "true").lower() in (
"1",
"true",
"yes",
)
# URL to trigger server data reload after scrape (e.g. http://server:8001/api/reload)
RELOAD_URL = os.environ.get("RELOAD_URL", "")
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
@ -32,6 +52,9 @@ HOMECOUK_PER_PAGE = 30 # max supported by the API
# OpenRent
OPENRENT_BASE = "https://www.openrent.co.uk"
# Zoopla
ZOOPLA_BASE = "https://www.zoopla.co.uk"
PROPERTY_TYPE_MAP = {
"Detached": "Detached",
"Semi-Detached": "Semi-Detached",
@ -44,6 +67,7 @@ PROPERTY_TYPE_MAP = {
"Apartment": "Flats/Maisonettes",
"Penthouse": "Flats/Maisonettes",
"Ground Flat": "Flats/Maisonettes",
"Duplex": "Flats/Maisonettes",
"Detached Bungalow": "Detached",
"Semi-Detached Bungalow": "Semi-Detached",
"Town House": "Terraced",
@ -52,9 +76,15 @@ PROPERTY_TYPE_MAP = {
"Bungalow": "Other",
"Cottage": "Other",
"Park Home": "Other",
"Mobile Home": "Other",
"Caravan": "Other",
"Lodge": "Other",
"Land": "Other",
"Farm / Barn": "Other",
"Farm House": "Other",
"House": "Detached",
"House of Multiple Occupation": "Flats/Maisonettes",
"House Share": "Other",
"Not Specified": "Other",
"Chalet": "Other",
"Barn Conversion": "Other",
@ -62,9 +92,20 @@ PROPERTY_TYPE_MAP = {
"Character Property": "Other",
"Cluster House": "Other",
"Retirement Property": "Flats/Maisonettes",
"Parking": "Other",
"Plot": "Other",
"Garages": "Other",
"Mews": "Terraced",
"Property": "Other",
# Lowercase variants (from home.co.uk / Rightmove APIs)
"house": "Detached",
"bungalow": "Other",
"townhouse": "Terraced",
"land": "Other",
"other": "Other",
"not-specified": "Other",
"retirement-property": "Flats/Maisonettes",
"equestrian-facility": "Other",
}
CHANNELS = [

View file

@ -86,7 +86,8 @@ def solve_cloudflare() -> tuple[dict[str, str], str] | None:
log.info(
"Cloudflare solved — got %d cookies, UA: %s",
len(cookies), user_agent[:60],
len(cookies),
user_agent[:60],
)
flaresolverr_attempts_total.labels(result="success").inc()
return cookies, user_agent
@ -129,11 +130,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
session = Session(impersonate="chrome")
session.headers.update({
session.headers.update(
{
"User-Agent": user_agent,
"Accept": "application/json, text/plain, */*",
"x-requested-with": "XMLHttpRequest",
})
}
)
# Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the
# X-XSRF-TOKEN request header (URL-decoded). Without this header, the
# server rejects every request with 419/403.
@ -165,7 +168,11 @@ def fetch_page(
return resp.json()
except json.JSONDecodeError:
homecouk_errors_total.labels(type="json_decode").inc()
log.error("Non-JSON response from %s (got %s)", url, resp.headers.get("content-type", "?"))
log.error(
"Non-JSON response from %s (got %s)",
url,
resp.headers.get("content-type", "?"),
)
return None
if resp.status_code == 403:
raise CookiesExpiredError("HTTP 403 — cookies likely expired")
@ -173,7 +180,11 @@ def fetch_page(
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code, url, attempt + 1, max_retries, delay,
resp.status_code,
url,
attempt + 1,
max_retries,
delay,
)
time.sleep(delay)
continue
@ -186,7 +197,11 @@ def fetch_page(
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__, url, attempt + 1, max_retries, delay,
type(e).__name__,
url,
attempt + 1,
max_retries,
delay,
)
time.sleep(delay)
homecouk_errors_total.labels(type="retry_exhausted").inc()
@ -218,7 +233,12 @@ def map_property_type(raw_type: str | None) -> str:
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
# Try common patterns
lower = raw_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
if (
"flat" in lower
or "apartment" in lower
or "maisonette" in lower
or "studio" in lower
):
return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower:
return "Detached"
@ -231,7 +251,9 @@ def map_property_type(raw_type: str | None) -> str:
def transform_property(
prop: dict, channel: str, pc_index: PostcodeSpatialIndex,
prop: dict,
channel: str,
pc_index: PostcodeSpatialIndex,
) -> dict | None:
"""Transform a raw home.co.uk property dict into our output schema."""
lat = prop.get("latitude")

View file

@ -11,7 +11,9 @@ from metrics import http_errors_total, http_requests_total, ip_rotations_total
log = logging.getLogger("rightmove")
_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0)
_ua = UserAgent(
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
)
def _endpoint_label(url: str) -> str:
@ -27,6 +29,7 @@ def _status_label(code: int) -> str:
return "5xx"
return str(code)
# Gluetun control API — runs on port 8000 inside the gluetun container.
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
GLUETUN_API = "http://127.0.0.1:8000"
@ -42,17 +45,25 @@ def rotate_ip() -> bool:
# Get current IP
with httpx.Client(timeout=10) as ctl:
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
old_ip = (
old_ip_resp.json().get("public_ip", "unknown")
if old_ip_resp.status_code == 200
else "unknown"
)
log.info("Current IP: %s", old_ip)
# Trigger server change — PUT with empty JSON body picks a random server
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}
)
if resp.status_code != 200:
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
return False
time.sleep(2)
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}
)
if resp.status_code != 200:
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
return False
@ -99,7 +110,9 @@ def fetch_with_retry(
for attempt in range(MAX_RETRIES):
try:
resp = client.get(url, params=params)
http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc()
http_requests_total.labels(
status=_status_label(resp.status_code), endpoint=endpoint
).inc()
if resp.status_code == 200:
return resp.json()
if resp.status_code == 403 and on_403:
@ -111,15 +124,34 @@ def fetch_with_retry(
return None
if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
continue
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
return None
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
except (
httpx.ConnectError,
httpx.ReadTimeout,
httpx.WriteTimeout,
httpx.PoolTimeout,
) as e:
http_errors_total.labels(type=type(e).__name__).inc()
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
http_errors_total.labels(type="retry_exhausted").inc()
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)

View file

@ -7,7 +7,15 @@ from pathlib import Path
from flask import Flask, Response, jsonify, send_from_directory
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
from constants import DATA_DIR, RUN_ON_STARTUP, SCHEDULE_HOUR, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE
from constants import (
DATA_DIR,
RUN_ON_STARTUP,
SCHEDULE_HOUR,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
SCRAPE_ZOOPLA,
)
from homecouk import load_cookies as load_homecouk_cookies
from openrent import load_cookies as load_openrent_cookies
from rightmove import outcode_cache
@ -41,6 +49,16 @@ log.setLevel(logging.DEBUG)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
# Suppress noisy /metrics and /health request logs from werkzeug
class _NoiseFilter(logging.Filter):
def filter(self, record):
msg = record.getMessage()
return "GET /metrics" not in msg and "GET /health" not in msg
logging.getLogger("werkzeug").addFilter(_NoiseFilter())
# ---------------------------------------------------------------------------
# Startup: load data
# ---------------------------------------------------------------------------
@ -48,9 +66,15 @@ logging.getLogger("httpcore").setLevel(logging.WARNING)
log.info("Loading arcgis data...")
OUTCODES = load_outcodes()
PC_INDEX = build_postcode_index()
PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None
log.info("Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)",
len(OUTCODES), SCRAPE_RIGHTMOVE, SCRAPE_HOMECOUK, SCRAPE_OPENRENT)
PC_COORDS = build_postcode_coords() if (SCRAPE_OPENRENT or SCRAPE_ZOOPLA) else None
log.info(
"Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s, zoopla=%s)",
len(OUTCODES),
SCRAPE_RIGHTMOVE,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_ZOOPLA,
)
# ---------------------------------------------------------------------------
# Scheduler
@ -63,7 +87,9 @@ def _start_scrape() -> bool:
if status.state == "running":
return False
status.state = "running"
thread = threading.Thread(target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True)
thread = threading.Thread(
target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True
)
thread.start()
return True
@ -82,7 +108,9 @@ def _scheduler_loop() -> None:
log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR)
while True:
wait = _seconds_until(SCHEDULE_HOUR)
log.info("Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600)
log.info(
"Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600
)
time.sleep(wait)
log.info("Scheduled scrape triggered")
if not _start_scrape():
@ -105,6 +133,11 @@ if SCHEDULE_HOUR >= 0:
app = Flask(__name__)
@app.route("/health")
def health():
return "ok", 200
@app.route("/run", methods=["POST"])
def trigger_run():
if _start_scrape():
@ -131,6 +164,7 @@ def get_status():
"rightmove": status.rm_properties,
"homecouk": status.hk_properties,
"openrent": status.or_properties,
"zoopla": status.zp_properties,
},
"errors": status.errors[-20:], # last 20 errors
"elapsed_seconds": round(elapsed, 1),
@ -144,15 +178,19 @@ def get_status():
def get_debug():
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None
return jsonify({
return jsonify(
{
"outcode_cache_size": len(outcode_cache),
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
"scrape_rightmove": SCRAPE_RIGHTMOVE,
"scrape_homecouk": SCRAPE_HOMECOUK,
"scrape_openrent": SCRAPE_OPENRENT,
"scrape_zoopla": SCRAPE_ZOOPLA,
"homecouk_cookies_available": hk_cookies is not None,
"openrent_cookies_available": or_cookies is not None,
})
"zoopla_note": "browser-based (Camoufox), no cookies needed",
}
)
@app.route("/metrics")

View file

@ -109,6 +109,28 @@ openrent_properties_scraped = Counter(
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — Zoopla
# ---------------------------------------------------------------------------
zoopla_pages_scraped = Counter(
"zoopla_pages_scraped",
"Search result pages scraped from Zoopla",
["channel"],
)
zoopla_errors_total = Counter(
"zoopla_errors_total",
"Zoopla scraping errors",
["type"],
)
zoopla_properties_scraped = Counter(
"zoopla_properties_scraped",
"Properties scraped from Zoopla (before dedup)",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — FlareSolverr / cookie management
# ---------------------------------------------------------------------------
@ -138,3 +160,8 @@ openrent_enabled = Gauge(
"openrent_enabled",
"Whether OpenRent scraping is currently active (1=yes, 0=no)",
)
zoopla_enabled = Gauge(
"zoopla_enabled",
"Whether Zoopla scraping is currently active (1=yes, 0=no)",
)

View file

@ -79,7 +79,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
if "AwsWafIntegration" in content:
log.info("Got WAF challenge page, waiting for resolution...")
page.wait_for_selector(
"a.pli, .pli, .search-property-card", timeout=30000,
"a.pli, .pli, .search-property-card",
timeout=30000,
)
raw_cookies = context.cookies()
@ -94,7 +95,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
log.info(
"AWS WAF solved — got %d cookies, UA: %s",
len(cookies), user_agent[:60],
len(cookies),
user_agent[:60],
)
flaresolverr_attempts_total.labels(result="success").inc()
return cookies, user_agent
@ -130,11 +132,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
"""Create a curl_cffi Session configured for OpenRent.
Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
session = Session(impersonate="chrome")
session.headers.update({
session.headers.update(
{
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-GB,en;q=0.9",
})
}
)
for name, value in cookies.items():
session.cookies.set(name, value, domain="openrent.co.uk")
return session
@ -152,7 +156,9 @@ def _status_label(code: int) -> str:
def fetch_page(
client: Session, url: str, max_retries: int = 3,
client: Session,
url: str,
max_retries: int = 3,
) -> str | None:
"""GET HTML with retries on 429/5xx. Returns None on permanent failure.
WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
@ -165,17 +171,25 @@ def fetch_page(
html = resp.text
# Detect WAF challenge page masquerading as 200
if "AwsWafIntegration" in html and "challenge.js" in html:
raise WafChallengeError("Got AWS WAF challenge page — cookies expired")
raise WafChallengeError(
"Got AWS WAF challenge page — cookies expired"
)
return html
if resp.status_code in (202, 403):
raise WafChallengeError(f"HTTP {resp.status_code} — cookies likely expired")
raise WafChallengeError(
f"HTTP {resp.status_code} — cookies likely expired"
)
if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2**attempt)
log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code, url, attempt + 1, max_retries, delay,
resp.status_code,
url,
attempt + 1,
max_retries,
delay,
)
time.sleep(delay)
continue
@ -190,7 +204,11 @@ def fetch_page(
delay = RETRY_BASE_DELAY * (2**attempt)
log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__, url, attempt + 1, max_retries, delay,
type(e).__name__,
url,
attempt + 1,
max_retries,
delay,
)
time.sleep(delay)
@ -247,7 +265,9 @@ def _extract_bedrooms_from_title(title: str) -> int | None:
return None
def _extract_beds_baths_from_features(feature_items: list) -> tuple[int | None, int | None]:
def _extract_beds_baths_from_features(
feature_items: list,
) -> tuple[int | None, int | None]:
"""Extract bedrooms and bathrooms from feature list items.
OpenRent search cards have <ul> with items like:
@ -442,11 +462,7 @@ def parse_search_results(html: str) -> list[dict]:
# --- Coordinates from data attributes (may not be present on cards) ---
for el in [card] + card.select("[data-lat], [data-latitude]"):
lat = el.get("data-lat") or el.get("data-latitude")
lng = (
el.get("data-lng")
or el.get("data-longitude")
or el.get("data-lon")
)
lng = el.get("data-lng") or el.get("data-longitude") or el.get("data-lon")
if lat and lng:
try:
prop["lat"] = float(lat)
@ -543,9 +559,7 @@ def parse_property_detail(html: str) -> dict:
break
# --- Description for floor area ---
desc_el = soup.select_one(
".description, [class*='description'], #description"
)
desc_el = soup.select_one(".description, [class*='description'], #description")
if desc_el:
details["description"] = desc_el.get_text(strip=True)
@ -567,7 +581,12 @@ def map_property_type(raw_type: str | None) -> str:
lower = raw_type.lower()
if "room" in lower or "shared" in lower:
return "Other"
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
if (
"flat" in lower
or "apartment" in lower
or "maisonette" in lower
or "studio" in lower
):
return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower:
return "Detached"
@ -647,7 +666,8 @@ def transform_property(
elif search_data.get("outcode"):
# No spatial index — try outcode lookup as fallback
outcode_pcs = _resolve_outcode_postcodes(
search_data["outcode"], pc_coords,
search_data["outcode"],
pc_coords,
)
if outcode_pcs:
postcode = outcode_pcs[0]
@ -708,7 +728,8 @@ def transform_property(
prop_id = search_data.get("id", "")
listing_url = search_data.get(
"url", f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
"url",
f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
)
description = detail.get("description") or search_data.get("description", "")
@ -767,7 +788,24 @@ def search_outcode(
for search_data in search_results:
detail_data = None
if fetch_details and search_data.get("url"):
# Skip detail page if we already have coordinates or a resolvable postcode
has_coords = (
search_data.get("lat") is not None
and search_data.get("lng") is not None
)
has_resolvable_pc = (
search_data.get("postcode")
and pc_coords
and search_data["postcode"] in pc_coords
)
needs_detail = (
fetch_details
and search_data.get("url")
and not has_coords
and not has_resolvable_pc
)
if needs_detail:
detail_html = fetch_page(client, search_data["url"])
if detail_html:
detail_data = parse_property_detail(detail_html)
@ -775,7 +813,10 @@ def search_outcode(
time.sleep(DELAY_BETWEEN_PAGES * 0.5)
transformed = transform_property(
search_data, detail_data, pc_index, pc_coords,
search_data,
detail_data,
pc_index,
pc_coords,
)
if transformed:
properties.append(transformed)

View file

@ -11,4 +11,6 @@ dependencies = [
"prometheus-client",
"beautifulsoup4",
"playwright>=1.58.0",
"playwright-stealth>=2.0.2",
"camoufox>=0.4.11",
]

View file

@ -24,7 +24,9 @@ def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
if outcode in outcode_cache:
return outcode_cache[outcode]
data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
data = fetch_with_retry(
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
)
if not data:
return None
@ -61,7 +63,12 @@ def search_outcode(
data = fetch_with_retry(client, SEARCH_URL, params)
if not data:
log.warning("Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"])
log.warning(
"Failed to fetch index %d for %s/%s",
index,
outcode,
channel_cfg["channel"],
)
break
raw_props = data.get("properties", [])

View file

@ -6,7 +6,20 @@ from dataclasses import dataclass, field
import polars as pl
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE, SEED
import httpx
from constants import (
ARCGIS_PATH,
CHANNELS,
DATA_DIR,
DELAY_BETWEEN_OUTCODES,
RELOAD_URL,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
SCRAPE_ZOOPLA,
SEED,
)
from homecouk import CookiesExpiredError
from homecouk import load_cookies as load_homecouk_cookies
from homecouk import make_client as make_homecouk_client
@ -23,12 +36,16 @@ from metrics import (
scrape_outcodes_total,
scrape_properties_total,
scrape_state,
zoopla_enabled,
)
from openrent import WafChallengeError
from openrent import load_cookies as load_openrent_cookies
from openrent import make_client as make_openrent_client
from openrent import search_outcode as openrent_search_outcode
from rightmove import resolve_outcode_id, search_outcode
from zoopla import TurnstileError
from zoopla import launch_browser as launch_zoopla_browser
from zoopla import search_outcode as zoopla_search_outcode
from spatial import PostcodeSpatialIndex
from storage import write_parquet
@ -44,10 +61,11 @@ class ScrapeStatus:
outcodes_total: int = 0
properties_buy: int = 0
properties_rent: int = 0
# Per-source counts for current channel
# Per-source counts (combined across channels)
rm_properties: int = 0
hk_properties: int = 0
or_properties: int = 0
zp_properties: int = 0
errors: list[str] = field(default_factory=list)
started_at: float = 0.0
finished_at: float = 0.0
@ -63,14 +81,26 @@ def _sync_gauges() -> None:
scrape_state.labels(state=state).set(1 if status.state == state else 0)
scrape_outcodes_done.set(status.outcodes_done)
scrape_outcodes_total.set(status.outcodes_total)
# Total properties (both sources combined)
scrape_properties_total.labels(channel="buy", source="total").set(status.properties_buy)
scrape_properties_total.labels(channel="rent", source="total").set(status.properties_rent)
# Per-source breakdown for current channel
ch = "buy" if status.channel == "BUY" else "rent"
scrape_properties_total.labels(channel=ch, source="rightmove").set(status.rm_properties)
scrape_properties_total.labels(channel=ch, source="homecouk").set(status.hk_properties)
scrape_properties_total.labels(channel=ch, source="openrent").set(status.or_properties)
scrape_properties_total.labels(channel="buy", source="total").set(
status.properties_buy
)
scrape_properties_total.labels(channel="rent", source="total").set(
status.properties_rent
)
# Per-source totals (across both channels)
for ch in ("buy", "rent"):
scrape_properties_total.labels(channel=ch, source="rightmove").set(
status.rm_properties
)
scrape_properties_total.labels(channel=ch, source="homecouk").set(
status.hk_properties
)
scrape_properties_total.labels(channel=ch, source="openrent").set(
status.or_properties
)
scrape_properties_total.labels(channel=ch, source="zoopla").set(
status.zp_properties
)
if status.started_at:
end = status.finished_at if status.finished_at else time.time()
scrape_elapsed_seconds.set(end - status.started_at)
@ -86,7 +116,9 @@ def load_outcodes() -> list[str]:
log.info("England postcodes: %d", len(england))
outcodes = (
england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
england.select(
pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode")
)
.drop_nulls()
.get_column("outcode")
.unique()
@ -101,7 +133,9 @@ def build_postcode_index() -> PostcodeSpatialIndex:
"""Build spatial index from arcgis England postcodes."""
log.info("Building postcode spatial index from %s", ARCGIS_PATH)
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
subset=["lat", "long"]
)
return PostcodeSpatialIndex(
england.get_column("lat").to_list(),
england.get_column("long").to_list(),
@ -114,7 +148,9 @@ def build_postcode_coords() -> dict[str, tuple[float, float]]:
Used by OpenRent scraper to resolve coordinates from postcodes."""
log.info("Building postcode coords lookup from %s", ARCGIS_PATH)
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
subset=["lat", "long"]
)
coords: dict[str, tuple[float, float]] = {}
for pcd, lat, lng in zip(
england.get_column("pcd").to_list(),
@ -126,6 +162,15 @@ def build_postcode_coords() -> dict[str, tuple[float, float]]:
return coords
def _fmt_elapsed(seconds: float) -> str:
"""Format seconds as e.g. '2h13m' or '5m32s'."""
h, rem = divmod(int(seconds), 3600)
m, s = divmod(rem, 60)
if h:
return f"{h}h{m:02d}m"
return f"{m}m{s:02d}s"
def _dedup_key(p: dict) -> tuple:
"""Composite key for cross-source deduplication: (postcode, bedrooms, price).
Two listings on different portals for the same physical property will share
@ -133,28 +178,89 @@ def _dedup_key(p: dict) -> tuple:
return (p.get("Postcode", ""), p.get("Bedrooms", 0), p.get("price", 0))
class _Progress:
"""Thread-safe progress tracker for parallel source workers."""
def __init__(self):
self._counts: dict[str, int] = {}
self._lock = threading.Lock()
def update(self, source: str, done: int) -> None:
with self._lock:
self._counts[source] = done
def snapshot(self) -> dict[str, int]:
with self._lock:
return dict(self._counts)
def _merge_channel(
rm_props: list[dict],
hk_props: list[dict],
or_props: list[dict],
zp_props: list[dict],
) -> tuple[dict[str, dict], dict[str, int], int]:
"""Merge properties from all sources for one channel with cross-source dedup.
Rightmove has priority; other sources are checked for duplicates.
Returns (all_properties_by_id, per_source_counts, total_dedup_count).
"""
all_properties: dict[str, dict] = {}
seen_keys: set[tuple] = set()
counts = {"rm": 0, "hk": 0, "or": 0, "zp": 0}
total_dedup = 0
# Rightmove first (priority source)
for p in rm_props:
pid = p["id"]
if pid not in all_properties:
all_properties[pid] = p
seen_keys.add(_dedup_key(p))
counts["rm"] += 1
# Other sources (check for cross-source duplicates)
for source, props in [("hk", hk_props), ("or", or_props), ("zp", zp_props)]:
for p in props:
pid = p["id"]
key = _dedup_key(p)
if pid in all_properties or key in seen_keys:
total_dedup += 1
continue
all_properties[pid] = p
seen_keys.add(key)
counts[source] += 1
return all_properties, counts, total_dedup
def run_scrape(
outcodes: list[str],
pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]] | None = None,
) -> None:
"""Main scrape loop — runs in background thread.
Scrapes Rightmove, home.co.uk, and OpenRent, merging into one dataset."""
"""Main scrape orchestrator — runs all sources in parallel threads.
Each source (Rightmove, home.co.uk, OpenRent, Zoopla) gets its own thread
that iterates all outcodes for both BUY and RENT channels. Results are
merged with cross-source deduplication after all workers complete.
"""
global status
with status_lock:
status.state = "running"
status.started_at = time.time()
status.finished_at = 0.0
status.errors = []
status.properties_buy = 0
status.properties_rent = 0
status.channel = ""
status.outcode = ""
_sync_gauges()
# Shuffle for geographic diversity
shuffled = list(outcodes)
random.seed(SEED)
random.shuffle(shuffled)
if not SCRAPE_RIGHTMOVE and not SCRAPE_HOMECOUK and not SCRAPE_OPENRENT:
if not any([SCRAPE_RIGHTMOVE, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_ZOOPLA]):
log.warning("All scrapers disabled — nothing to do")
with status_lock:
status.state = "done"
@ -162,240 +268,422 @@ def run_scrape(
_sync_gauges()
return
client = make_client() if SCRAPE_RIGHTMOVE else None
if not SCRAPE_RIGHTMOVE:
log.info("Rightmove scraping DISABLED (SCRAPE_RIGHTMOVE=false)")
# home.co.uk: must be enabled via SCRAPE_HOMECOUK + cookies available
hk_client = None
hk_failed = False
if not SCRAPE_HOMECOUK:
log.info("home.co.uk scraping DISABLED (SCRAPE_HOMECOUK=false)")
homecouk_enabled.set(0)
else:
hk_result = load_homecouk_cookies()
hk_client = make_homecouk_client(*hk_result) if hk_result else None
if hk_client:
log.info("home.co.uk scraping ENABLED")
homecouk_enabled.set(1)
else:
log.info("home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)")
homecouk_enabled.set(0)
# OpenRent: must be enabled via SCRAPE_OPENRENT + cookies available
or_client = None
or_failed = False
if not SCRAPE_OPENRENT:
log.info("OpenRent scraping DISABLED (SCRAPE_OPENRENT=false)")
openrent_enabled.set(0)
else:
or_result = load_openrent_cookies()
or_client = make_openrent_client(*or_result) if or_result else None
if or_client:
log.info("OpenRent scraping ENABLED")
openrent_enabled.set(1)
else:
log.info("OpenRent scraping DISABLED (need FlareSolverr or OPENRENT_WAF_TOKEN)")
openrent_enabled.set(0)
if not SCRAPE_ZOOPLA:
log.info("Zoopla scraping DISABLED (SCRAPE_ZOOPLA=false)")
zoopla_enabled.set(0)
# Build postcode coords if OpenRent is active and caller didn't provide them
if or_client and pc_coords is None:
# Build postcode coords if needed for OpenRent/Zoopla
if (SCRAPE_OPENRENT or SCRAPE_ZOOPLA) and pc_coords is None:
pc_coords = build_postcode_coords()
# Per-source result containers: {channel_name: [properties]}
# Each list is only written by its owning source thread.
rm_results: dict[str, list] = {"BUY": [], "RENT": []}
hk_results: dict[str, list] = {"BUY": [], "RENT": []}
or_results: dict[str, list] = {"BUY": [], "RENT": []}
zp_results: dict[str, list] = {"BUY": [], "RENT": []}
progress = _Progress()
# --- Source worker closures ---
# Each worker owns its client lifecycle and iterates all outcodes for both
# channels. On auth failure, it refreshes cookies and continues. On fatal
# failure, it marks itself as done and returns partial results.
def rm_worker():
client = make_client()
try:
for channel_cfg in CHANNELS:
channel_name = channel_cfg["channel"]
file_suffix = "buy" if channel_name == "BUY" else "rent"
all_properties: dict[str, dict] = {} # dedup by id
seen_dedup_keys: set[tuple] = set() # cross-source dedup by (postcode, beds, price)
rm_count = 0 # Rightmove properties this channel
hk_count = 0 # home.co.uk properties this channel
hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates
or_count = 0 # OpenRent properties this channel
or_dedup_count = 0 # OpenRent skipped as cross-source duplicates
with status_lock:
status.channel = channel_name
status.outcodes_done = 0
status.outcodes_total = len(shuffled)
status.rm_properties = 0
status.hk_properties = 0
status.or_properties = 0
log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
for i, outcode in enumerate(shuffled):
with status_lock:
status.outcode = outcode
status.outcodes_done = i
log.debug("Outcode %s (%d/%d) — %d properties so far",
outcode, i + 1, len(shuffled), len(all_properties))
made_requests = False
# --- Rightmove ---
if SCRAPE_RIGHTMOVE:
made_requests = True
try:
outcode_id = resolve_outcode_id(client, outcode)
if not outcode_id:
log.debug("No Rightmove ID for outcode %s, skipping", outcode)
else:
props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
for p in props:
pid = p["id"]
if pid not in all_properties:
all_properties[pid] = p
seen_dedup_keys.add(_dedup_key(p))
rm_count += 1
except Exception as e:
msg = f"Error scraping Rightmove {outcode}/{channel_name}: {e}"
log.error(msg)
log.error("Rightmove %s ID lookup: %s", outcode, e)
scrape_errors_total.labels(source="rightmove").inc()
with status_lock:
status.errors.append(msg)
# --- home.co.uk ---
if hk_client and not hk_failed:
made_requests = True
try:
hk_props = homecouk_search_outcode(
hk_client, outcode, channel_name, pc_index,
)
for p in hk_props:
pid = p["id"]
key = _dedup_key(p)
if pid in all_properties or key in seen_dedup_keys:
hk_dedup_count += 1
cross_source_dedup_total.labels(
channel="buy" if channel_name == "BUY" else "rent",
).inc()
progress.update("rm", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
continue
all_properties[pid] = p
seen_dedup_keys.add(key)
hk_count += 1
if hk_props:
log.info("home.co.uk %s: +%d properties", outcode, len(hk_props))
except CookiesExpiredError:
log.warning("home.co.uk cookies expired — attempting refresh via FlareSolverr")
hk_client.close()
if not outcode_id:
log.debug("No Rightmove ID for %s, skipping", outcode)
progress.update("rm", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
continue
for ch_cfg in CHANNELS:
ch = ch_cfg["channel"]
try:
props = search_outcode(
client, outcode_id, outcode, ch_cfg, pc_index
)
rm_results[ch].extend(props)
except Exception as e:
log.error("Rightmove %s/%s: %s", outcode, ch, e)
scrape_errors_total.labels(source="rightmove").inc()
progress.update("rm", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
except Exception as e:
log.exception("Fatal Rightmove error: %s", e)
with status_lock:
status.errors.append(f"Fatal Rightmove: {e}")
finally:
client.close()
def hk_worker():
hk_result = load_homecouk_cookies()
if hk_result:
hk_client = make_homecouk_client(*hk_result)
if not hk_result:
log.info("home.co.uk DISABLED (no cookies available)")
homecouk_enabled.set(0)
progress.update("hk", len(shuffled))
return
client = make_homecouk_client(*hk_result)
log.info("home.co.uk scraping ENABLED")
homecouk_enabled.set(1)
try:
for i, outcode in enumerate(shuffled):
for ch_cfg in CHANNELS:
ch = ch_cfg["channel"]
try:
props = homecouk_search_outcode(
client, outcode, ch, pc_index
)
hk_results[ch].extend(props)
if props:
log.info("home.co.uk %s: +%d properties", outcode, len(props))
except CookiesExpiredError:
log.warning(
"home.co.uk cookies expired — attempting refresh"
)
client.close()
hk_new = load_homecouk_cookies()
if hk_new:
client = make_homecouk_client(*hk_new)
log.info("home.co.uk cookies refreshed, continuing")
cookie_refreshes_total.labels(result="success").inc()
else:
log.warning("Cookie refresh failed, disabling home.co.uk for rest of scrape")
hk_client = None
hk_failed = True
log.warning(
"Cookie refresh failed, disabling home.co.uk"
)
homecouk_enabled.set(0)
cookie_refreshes_total.labels(result="failure").inc()
with status_lock:
status.errors.append("home.co.uk cookies expired and refresh failed")
except Exception as e:
msg = f"Error scraping home.co.uk {outcode}/{channel_name}: {e}"
log.error(msg)
scrape_errors_total.labels(source="homecouk").inc()
with status_lock:
status.errors.append(msg)
# --- OpenRent (RENT channel only) ---
if or_client and not or_failed and channel_name == "RENT":
made_requests = True
try:
or_props = openrent_search_outcode(
or_client, outcode, pc_index, pc_coords,
status.errors.append(
"home.co.uk cookies expired and refresh failed"
)
for p in or_props:
pid = p["id"]
key = _dedup_key(p)
if pid in all_properties or key in seen_dedup_keys:
or_dedup_count += 1
cross_source_dedup_total.labels(channel="rent").inc()
continue
all_properties[pid] = p
seen_dedup_keys.add(key)
or_count += 1
if or_props:
log.info("OpenRent %s: +%d properties", outcode, len(or_props))
except WafChallengeError:
log.warning("OpenRent WAF cookies expired — attempting refresh via FlareSolverr")
or_client.close()
progress.update("hk", len(shuffled))
return
except Exception as e:
log.error("home.co.uk %s/%s: %s", outcode, ch, e)
scrape_errors_total.labels(source="homecouk").inc()
progress.update("hk", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
except Exception as e:
log.exception("Fatal home.co.uk error: %s", e)
with status_lock:
status.errors.append(f"Fatal home.co.uk: {e}")
finally:
try:
client.close()
except Exception:
pass
def or_worker():
or_result = load_openrent_cookies()
if or_result:
or_client = make_openrent_client(*or_result)
if not or_result:
log.info("OpenRent DISABLED (no cookies available)")
openrent_enabled.set(0)
progress.update("or", len(shuffled))
return
client = make_openrent_client(*or_result)
log.info("OpenRent scraping ENABLED")
openrent_enabled.set(1)
try:
for i, outcode in enumerate(shuffled):
# OpenRent is RENT-only
try:
props = openrent_search_outcode(
client, outcode, pc_index, pc_coords
)
or_results["RENT"].extend(props)
if props:
log.info("OpenRent %s: +%d properties", outcode, len(props))
except WafChallengeError:
log.warning(
"OpenRent WAF cookies expired — attempting refresh"
)
client.close()
or_new = load_openrent_cookies()
if or_new:
client = make_openrent_client(*or_new)
log.info("OpenRent cookies refreshed, continuing")
cookie_refreshes_total.labels(result="success").inc()
else:
log.warning("Cookie refresh failed, disabling OpenRent for rest of scrape")
or_client = None
or_failed = True
log.warning(
"Cookie refresh failed, disabling OpenRent"
)
openrent_enabled.set(0)
cookie_refreshes_total.labels(result="failure").inc()
with status_lock:
status.errors.append("OpenRent WAF cookies expired and refresh failed")
status.errors.append(
"OpenRent WAF cookies expired and refresh failed"
)
progress.update("or", len(shuffled))
return
except Exception as e:
msg = f"Error scraping OpenRent {outcode}/{channel_name}: {e}"
log.error(msg)
log.error("OpenRent %s: %s", outcode, e)
scrape_errors_total.labels(source="openrent").inc()
progress.update("or", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
except Exception as e:
log.exception("Fatal OpenRent error: %s", e)
with status_lock:
status.errors.append(msg)
status.errors.append(f"Fatal OpenRent: {e}")
finally:
try:
client.close()
except Exception:
pass
def zp_worker():
try:
browser, page = launch_zoopla_browser()
log.info("Zoopla scraping ENABLED (Camoufox browser launched)")
zoopla_enabled.set(1)
except TurnstileError:
log.warning("Zoopla Cloudflare Turnstile failed — disabling Zoopla")
zoopla_enabled.set(0)
progress.update("zp", len(shuffled))
return
except Exception as e:
log.warning("Zoopla browser launch failed: %s — disabling Zoopla", e)
zoopla_enabled.set(0)
progress.update("zp", len(shuffled))
return
try:
for i, outcode in enumerate(shuffled):
for ch_cfg in CHANNELS:
ch = ch_cfg["channel"]
try:
props = zoopla_search_outcode(
page, outcode, ch, pc_index, pc_coords
)
zp_results[ch].extend(props)
if props:
log.info("Zoopla %s: +%d properties", outcode, len(props))
except TurnstileError:
log.warning(
"Zoopla Turnstile challenge — relaunching browser"
)
try:
browser.close()
except Exception:
pass
try:
browser, page = launch_zoopla_browser()
log.info("Zoopla browser relaunched, continuing")
except Exception:
log.warning(
"Browser relaunch failed, disabling Zoopla"
)
zoopla_enabled.set(0)
with status_lock:
status.errors.append(
"Zoopla Cloudflare challenge failed and relaunch failed"
)
progress.update("zp", len(shuffled))
return
except Exception as e:
log.error("Zoopla %s/%s: %s", outcode, ch, e)
scrape_errors_total.labels(source="zoopla").inc()
progress.update("zp", i + 1)
time.sleep(DELAY_BETWEEN_OUTCODES)
except Exception as e:
log.exception("Fatal Zoopla error: %s", e)
with status_lock:
status.errors.append(f"Fatal Zoopla: {e}")
finally:
try:
browser.close()
except Exception:
pass
# --- Launch worker threads ---
active_sources: list[str] = []
threads: list[threading.Thread] = []
if SCRAPE_RIGHTMOVE:
threads.append(threading.Thread(target=rm_worker, name="scrape-rm", daemon=True))
active_sources.append("rm")
if SCRAPE_HOMECOUK:
threads.append(threading.Thread(target=hk_worker, name="scrape-hk", daemon=True))
active_sources.append("hk")
if SCRAPE_OPENRENT:
threads.append(threading.Thread(target=or_worker, name="scrape-or", daemon=True))
active_sources.append("or")
if SCRAPE_ZOOPLA:
threads.append(threading.Thread(target=zp_worker, name="scrape-zp", daemon=True))
active_sources.append("zp")
log.info(
"=== Starting scrape: %d outcodes, sources: %s ===",
len(shuffled),
", ".join(active_sources),
)
for t in threads:
t.start()
# --- Monitor progress while workers run ---
scrape_start = time.time()
last_log = 0.0
try:
while any(t.is_alive() for t in threads):
snap = progress.snapshot()
min_done = min(
(snap.get(s, 0) for s in active_sources), default=0
)
# Count properties across sources (safe: only one thread writes each list)
total_buy = sum(
len(r["BUY"]) for r in [rm_results, hk_results, or_results, zp_results]
)
total_rent = sum(
len(r["RENT"]) for r in [rm_results, hk_results, or_results, zp_results]
)
with status_lock:
if channel_name == "BUY":
status.properties_buy = len(all_properties)
else:
status.properties_rent = len(all_properties)
status.rm_properties = rm_count
status.hk_properties = hk_count
status.or_properties = or_count
status.outcodes_done = min_done
status.outcodes_total = len(shuffled)
status.properties_buy = total_buy
status.properties_rent = total_rent
status.rm_properties = len(rm_results["BUY"]) + len(rm_results["RENT"])
status.hk_properties = len(hk_results["BUY"]) + len(hk_results["RENT"])
status.or_properties = len(or_results["RENT"])
status.zp_properties = len(zp_results["BUY"]) + len(zp_results["RENT"])
_sync_gauges()
log.info("Outcode %s: total %d (rm: %d, hk: %d, or: %d)",
outcode, len(all_properties), rm_count, hk_count, or_count)
# Log progress every 30 seconds
now = time.time()
if now - last_log >= 30:
elapsed = now - scrape_start
per_source = ", ".join(
f"{s}:{snap.get(s, 0)}" for s in active_sources
)
log.info(
"Progress: %d/%d outcodes (%s), %d buy + %d rent props, %s elapsed",
min_done,
len(shuffled),
per_source,
total_buy,
total_rent,
_fmt_elapsed(elapsed),
)
last_log = now
if made_requests and i < len(shuffled) - 1:
time.sleep(DELAY_BETWEEN_OUTCODES)
time.sleep(5)
except Exception as e:
log.exception("Monitor loop error: %s", e)
# Write parquet
deduped = list(all_properties.values())
for t in threads:
t.join()
log.info("All source workers completed")
# --- Merge results per channel and write parquet ---
try:
for ch_cfg in CHANNELS:
ch = ch_cfg["channel"]
file_suffix = "buy" if ch == "BUY" else "rent"
merged, counts, total_dedup = _merge_channel(
rm_results[ch],
hk_results[ch],
or_results[ch],
zp_results[ch],
)
# Update cross-source dedup counter
ch_label = "buy" if ch == "BUY" else "rent"
if total_dedup:
cross_source_dedup_total.labels(channel=ch_label).inc(total_dedup)
deduped = list(merged.values())
output_path = DATA_DIR / f"online_listings_{file_suffix}.parquet"
write_parquet(deduped, output_path, channel=file_suffix)
with status_lock:
if channel_name == "BUY":
if ch == "BUY":
status.properties_buy = len(deduped)
else:
status.properties_rent = len(deduped)
status.outcodes_done = len(shuffled)
_sync_gauges()
log.info(
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===",
channel_name, len(deduped), rm_count, hk_count, or_count,
hk_dedup_count + or_dedup_count,
"=== %s complete: %d unique (rm:%d hk:%d or:%d zp:%d, cross-dedup:%d) ===",
ch,
len(deduped),
counts["rm"],
counts["hk"],
counts["or"],
counts["zp"],
total_dedup,
)
with status_lock:
status.state = "done"
status.finished_at = time.time()
status.outcodes_done = len(shuffled)
_sync_gauges()
elapsed = status.finished_at - status.started_at
log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
elapsed, status.properties_buy, status.properties_rent)
log.info(
"Scrape complete in %s — buy: %d, rent: %d",
_fmt_elapsed(elapsed),
status.properties_buy,
status.properties_rent,
)
# Trigger server data reload
if RELOAD_URL:
try:
log.info("Triggering server reload at %s", RELOAD_URL)
resp = httpx.post(RELOAD_URL, timeout=300)
if resp.is_success:
body = resp.json()
log.info(
"Server reload complete: %d rows, %d features, %dms",
body.get("rows", 0),
body.get("features", 0),
body.get("elapsed_ms", 0),
)
else:
log.warning(
"Server reload failed (%d): %s",
resp.status_code,
resp.text[:200],
)
except Exception as e:
log.warning("Server reload request failed: %s", e)
except Exception as e:
log.exception("Fatal scrape error")
log.exception("Fatal scrape error during merge/write")
with status_lock:
status.state = "error"
status.errors.append(f"Fatal: {e}")
status.finished_at = time.time()
_sync_gauges()
finally:
if client:
client.close()
if hk_client:
hk_client.close()
if or_client:
or_client.close()

View file

@ -11,12 +11,16 @@ class PostcodeSpatialIndex:
"""Grid-based spatial index over arcgis postcodes for nearest-lookup."""
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(
list
)
for lat, lng, pcd in zip(lats, lngs, postcodes):
gx = int(math.floor(lng / GRID_CELL_SIZE))
gy = int(math.floor(lat / GRID_CELL_SIZE))
self.grid[(gx, gy)].append((lat, lng, pcd))
log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
log.info(
"Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats)
)
def nearest(self, lat: float, lng: float) -> str | None:
gx = int(math.floor(lng / GRID_CELL_SIZE))

View file

@ -25,7 +25,11 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
if fvd:
try:
dt = datetime.fromisoformat(fvd.replace("Z", "+00:00"))
listing_dates.append(dt.replace(tzinfo=None))
# Convert to UTC naive datetime for consistent storage
if dt.tzinfo is not None:
from datetime import timezone
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
listing_dates.append(dt)
except (ValueError, TypeError):
listing_dates.append(None)
else:
@ -60,9 +64,7 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
"Property type": [p["Property type"] for p in properties],
"Property sub-type": [p["Property sub-type"] for p in properties],
"Price qualifier": [p["Price qualifier"] for p in properties],
"Total floor area (sqm)": [
p["Total floor area (sqm)"] for p in properties
],
"Total floor area (sqm)": [p["Total floor area (sqm)"] for p in properties],
"Listing URL": [p["Listing URL"] for p in properties],
"Listing features": [p["Listing features"] for p in properties],
"Listing date": listing_dates,

View file

@ -51,9 +51,19 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
if 49 <= lat <= 56 and -7 <= lng <= 2:
return lat, lng
if 49 <= lng <= 56 and -7 <= lat <= 2:
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
log.debug(
"Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f",
lat,
lng,
lng,
lat,
)
return lng, lat
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
log.warning(
"Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f",
lat,
lng,
)
return lat, lng
@ -66,7 +76,9 @@ def normalize_price(amount: int, frequency: str) -> int:
return amount
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
def transform_property(
prop: dict, outcode: str, pc_index: PostcodeSpatialIndex
) -> dict | None:
"""Transform a raw Rightmove property dict into our output schema."""
loc = prop.get("location")
if not loc:
@ -86,13 +98,19 @@ def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex)
price = normalize_price(int(amount), frequency)
display_prices = price_obj.get("displayPrices", [])
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
price_qualifier = (
display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
)
sub_type = prop.get("propertySubType", "")
bedrooms = prop.get("bedrooms", 0) or 0
bathrooms = prop.get("bathrooms", 0) or 0
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
key_features = [
kf.get("description", "")
for kf in prop.get("keyFeatures", [])
if kf.get("description")
]
postcode = pc_index.nearest(lat, lng)
if not postcode:

515
finder/uv.lock generated
View file

@ -15,6 +15,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
]
[[package]]
name = "apify-fingerprint-datapoints"
version = "0.11.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bb/a9/586b7ebdd682c047cd0b551dc7e154bb1480f8f6548154708e9a6c7844db/apify_fingerprint_datapoints-0.11.0.tar.gz", hash = "sha256:3f905c392b11a27fb59ccfe40891c166abd737ab9c6209733f102bbb3b302515", size = 969830, upload-time = "2026-03-01T01:00:04.737Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/af/38/9483eb52fc0f00039c684af627f8a8f994a8a99e8eceb869ba93b3fd740b/apify_fingerprint_datapoints-0.11.0-py3-none-any.whl", hash = "sha256:333340ccc3e520f19b5561e95d7abe2b31702e61d34b6247b328c9b8c93fbe1d", size = 726498, upload-time = "2026-03-01T01:00:03.103Z" },
]
[[package]]
name = "beautifulsoup4"
version = "4.14.3"
@ -37,6 +46,45 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
]
[[package]]
name = "browserforge"
version = "1.2.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "apify-fingerprint-datapoints" },
{ name = "click" },
]
sdist = { url = "https://files.pythonhosted.org/packages/78/6f/8975af88d203efd70cc69477ebac702babef38201d04621c9583f2508f25/browserforge-1.2.4.tar.gz", hash = "sha256:05686473793769856ebd3528c69071f5be0e511260993e8b2ba839863711a0c4", size = 36700, upload-time = "2026-02-03T02:52:09.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/dd/35/ce962f738ae28ffce6293e7607b129075633e6bb185a5ab87e49246eedc2/browserforge-1.2.4-py3-none-any.whl", hash = "sha256:fb1c14e62ac09de221dcfc73074200269f697596c642cb200ceaab1127a17542", size = 37890, upload-time = "2026-02-03T02:52:08.745Z" },
]
[[package]]
name = "camoufox"
version = "0.4.11"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "browserforge" },
{ name = "click" },
{ name = "language-tags" },
{ name = "lxml" },
{ name = "numpy" },
{ name = "orjson" },
{ name = "platformdirs" },
{ name = "playwright" },
{ name = "pysocks" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "screeninfo" },
{ name = "tqdm" },
{ name = "typing-extensions" },
{ name = "ua-parser" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d3/15/e0a1b586e354ea6b8d6612717bf4372aaaa6753444d5d006caf0bb116466/camoufox-0.4.11.tar.gz", hash = "sha256:0a2c9d24ac5070c104e7c2b125c0a3937f70efa416084ef88afe94c32a72eebe", size = 64409, upload-time = "2025-01-29T09:33:20.019Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c6/7b/a2f099a5afb9660271b3f20f6056ba679e7ab4eba42682266a65d5730f7e/camoufox-0.4.11-py3-none-any.whl", hash = "sha256:83864d434d159a7566990aa6524429a8d1a859cbf84d2f64ef4a9f29e7d2e5ff", size = 71628, upload-time = "2025-01-29T09:33:18.558Z" },
]
[[package]]
name = "certifi"
version = "2026.2.25"
@ -103,6 +151,79 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/62/c0815c992c9545347aeea7859b50dc9044d147e2e7278329c6e02ac9a616/charset_normalizer-3.4.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab", size = 295154, upload-time = "2026-03-15T18:50:50.88Z" },
{ url = "https://files.pythonhosted.org/packages/a8/37/bdca6613c2e3c58c7421891d80cc3efa1d32e882f7c4a7ee6039c3fc951a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21", size = 199191, upload-time = "2026-03-15T18:50:52.658Z" },
{ url = "https://files.pythonhosted.org/packages/6c/92/9934d1bbd69f7f398b38c5dae1cbf9cc672e7c34a4adf7b17c0a9c17d15d/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2", size = 218674, upload-time = "2026-03-15T18:50:54.102Z" },
{ url = "https://files.pythonhosted.org/packages/af/90/25f6ab406659286be929fd89ab0e78e38aa183fc374e03aa3c12d730af8a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff", size = 215259, upload-time = "2026-03-15T18:50:55.616Z" },
{ url = "https://files.pythonhosted.org/packages/4e/ef/79a463eb0fff7f96afa04c1d4c51f8fc85426f918db467854bfb6a569ce3/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5", size = 207276, upload-time = "2026-03-15T18:50:57.054Z" },
{ url = "https://files.pythonhosted.org/packages/f7/72/d0426afec4b71dc159fa6b4e68f868cd5a3ecd918fec5813a15d292a7d10/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0", size = 195161, upload-time = "2026-03-15T18:50:58.686Z" },
{ url = "https://files.pythonhosted.org/packages/bf/18/c82b06a68bfcb6ce55e508225d210c7e6a4ea122bfc0748892f3dc4e8e11/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a", size = 203452, upload-time = "2026-03-15T18:51:00.196Z" },
{ url = "https://files.pythonhosted.org/packages/44/d6/0c25979b92f8adafdbb946160348d8d44aa60ce99afdc27df524379875cb/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2", size = 202272, upload-time = "2026-03-15T18:51:01.703Z" },
{ url = "https://files.pythonhosted.org/packages/2e/3d/7fea3e8fe84136bebbac715dd1221cc25c173c57a699c030ab9b8900cbb7/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5", size = 195622, upload-time = "2026-03-15T18:51:03.526Z" },
{ url = "https://files.pythonhosted.org/packages/57/8a/d6f7fd5cb96c58ef2f681424fbca01264461336d2a7fc875e4446b1f1346/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6", size = 220056, upload-time = "2026-03-15T18:51:05.269Z" },
{ url = "https://files.pythonhosted.org/packages/16/50/478cdda782c8c9c3fb5da3cc72dd7f331f031e7f1363a893cdd6ca0f8de0/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d", size = 203751, upload-time = "2026-03-15T18:51:06.858Z" },
{ url = "https://files.pythonhosted.org/packages/75/fc/cc2fcac943939c8e4d8791abfa139f685e5150cae9f94b60f12520feaa9b/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2", size = 216563, upload-time = "2026-03-15T18:51:08.564Z" },
{ url = "https://files.pythonhosted.org/packages/a8/b7/a4add1d9a5f68f3d037261aecca83abdb0ab15960a3591d340e829b37298/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923", size = 209265, upload-time = "2026-03-15T18:51:10.312Z" },
{ url = "https://files.pythonhosted.org/packages/6c/18/c094561b5d64a24277707698e54b7f67bd17a4f857bbfbb1072bba07c8bf/charset_normalizer-3.4.6-cp312-cp312-win32.whl", hash = "sha256:c2274ca724536f173122f36c98ce188fd24ce3dad886ec2b7af859518ce008a4", size = 144229, upload-time = "2026-03-15T18:51:11.694Z" },
{ url = "https://files.pythonhosted.org/packages/ab/20/0567efb3a8fd481b8f34f739ebddc098ed062a59fed41a8d193a61939e8f/charset_normalizer-3.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:c8ae56368f8cc97c7e40a7ee18e1cedaf8e780cd8bc5ed5ac8b81f238614facb", size = 154277, upload-time = "2026-03-15T18:51:13.004Z" },
{ url = "https://files.pythonhosted.org/packages/15/57/28d79b44b51933119e21f65479d0864a8d5893e494cf5daab15df0247c17/charset_normalizer-3.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:899d28f422116b08be5118ef350c292b36fc15ec2daeb9ea987c89281c7bb5c4", size = 142817, upload-time = "2026-03-15T18:51:14.408Z" },
{ url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" },
{ url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" },
{ url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" },
{ url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" },
{ url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" },
{ url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" },
{ url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" },
{ url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" },
{ url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" },
{ url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" },
{ url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" },
{ url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" },
{ url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" },
{ url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" },
{ url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" },
{ url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" },
{ url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" },
{ url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" },
{ url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" },
{ url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" },
{ url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" },
{ url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" },
{ url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" },
{ url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" },
{ url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" },
{ url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" },
{ url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" },
{ url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" },
{ url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" },
{ url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" },
{ url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" },
{ url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" },
{ url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" },
{ url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" },
{ url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" },
{ url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" },
{ url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" },
{ url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" },
{ url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" },
{ url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" },
{ url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" },
{ url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" },
{ url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" },
{ url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" },
{ url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" },
{ url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" },
{ url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" },
{ url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" },
{ url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" },
]
[[package]]
name = "click"
version = "8.3.1"
@ -147,6 +268,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5c/7c/d2ba86b0b3e1e2830bd94163d047de122c69a8df03c5c7c36326c456ad82/curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c", size = 1425067, upload-time = "2025-12-16T03:25:06.454Z" },
]
[[package]]
name = "cython"
version = "3.2.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/91/85/7574c9cd44b69a27210444b6650f6477f56c75fee1b70d7672d3e4166167/cython-3.2.4.tar.gz", hash = "sha256:84226ecd313b233da27dc2eb3601b4f222b8209c3a7216d8733b031da1dc64e6", size = 3280291, upload-time = "2026-01-04T14:14:14.473Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/4d/1eb0c7c196a136b1926f4d7f0492a96c6fabd604d77e6cd43b56a3a16d83/cython-3.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:64d7f71be3dd6d6d4a4c575bb3a4674ea06d1e1e5e4cd1b9882a2bc40ed3c4c9", size = 2970064, upload-time = "2026-01-04T14:15:08.567Z" },
{ url = "https://files.pythonhosted.org/packages/18/b5/1cfca43b7d20a0fdb1eac67313d6bb6b18d18897f82dd0f17436bdd2ba7f/cython-3.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:28e8075087a59756f2d059273184b8b639fe0f16cf17470bd91c39921bc154e0", size = 2960506, upload-time = "2026-01-04T14:15:16.733Z" },
{ url = "https://files.pythonhosted.org/packages/ee/d7/3bda3efce0c5c6ce79cc21285dbe6f60369c20364e112f5a506ee8a1b067/cython-3.2.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d4b4fd5332ab093131fa6172e8362f16adef3eac3179fd24bbdc392531cb82fa", size = 2971496, upload-time = "2026-01-04T14:15:25.038Z" },
{ url = "https://files.pythonhosted.org/packages/0a/8b/fd393f0923c82be4ec0db712fffb2ff0a7a131707b842c99bf24b549274d/cython-3.2.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:36bf3f5eb56d5281aafabecbaa6ed288bc11db87547bba4e1e52943ae6961ccf", size = 2875622, upload-time = "2026-01-04T14:15:39.749Z" },
{ url = "https://files.pythonhosted.org/packages/ff/fa/d3c15189f7c52aaefbaea76fb012119b04b9013f4bf446cb4eb4c26c4e6b/cython-3.2.4-py3-none-any.whl", hash = "sha256:732fc93bc33ae4b14f6afaca663b916c2fdd5dcbfad7114e17fb2434eeaea45c", size = 1257078, upload-time = "2026-01-04T14:14:12.373Z" },
]
[[package]]
name = "fake-useragent"
version = "2.2.0"
@ -162,11 +296,13 @@ version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "camoufox" },
{ name = "curl-cffi" },
{ name = "fake-useragent" },
{ name = "flask" },
{ name = "httpx" },
{ name = "playwright" },
{ name = "playwright-stealth" },
{ name = "polars" },
{ name = "prometheus-client" },
]
@ -174,11 +310,13 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "beautifulsoup4" },
{ name = "camoufox", specifier = ">=0.4.11" },
{ name = "curl-cffi" },
{ name = "fake-useragent", specifier = ">=2.2.0" },
{ name = "flask" },
{ name = "httpx" },
{ name = "playwright", specifier = ">=1.58.0" },
{ name = "playwright-stealth", specifier = ">=2.0.2" },
{ name = "polars" },
{ name = "prometheus-client" },
]
@ -310,6 +448,95 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
]
[[package]]
name = "language-tags"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e0/7e/b6a0efe4fee11e9742c1baaedf7c574084238a70b03c1d8eb2761383848f/language_tags-1.2.0.tar.gz", hash = "sha256:e934acba3e3dc85f867703eca421847a9ab7b7679b11b5d5cfd096febbf8bde6", size = 207901, upload-time = "2023-01-11T18:38:07.893Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b0/42/327554649ed2dd5ce59d3f5da176c7be20f9352c7c6c51597293660b7b08/language_tags-1.2.0-py3-none-any.whl", hash = "sha256:d815604622242fdfbbfd747b40c31213617fd03734a267f2e39ee4bd73c88722", size = 213449, upload-time = "2023-01-11T18:38:05.692Z" },
]
[[package]]
name = "lxml"
version = "6.0.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" },
{ url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" },
{ url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" },
{ url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" },
{ url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" },
{ url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" },
{ url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" },
{ url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" },
{ url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" },
{ url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" },
{ url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" },
{ url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" },
{ url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" },
{ url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" },
{ url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" },
{ url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" },
{ url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" },
{ url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" },
{ url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" },
{ url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" },
{ url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" },
{ url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" },
{ url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" },
{ url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" },
{ url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" },
{ url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" },
{ url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" },
{ url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" },
{ url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" },
{ url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" },
{ url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" },
{ url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" },
{ url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" },
{ url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" },
{ url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" },
{ url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" },
{ url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" },
{ url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" },
{ url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" },
{ url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" },
{ url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" },
{ url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" },
{ url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" },
{ url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" },
{ url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" },
{ url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" },
{ url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" },
{ url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" },
{ url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" },
{ url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" },
{ url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" },
{ url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" },
{ url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" },
{ url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" },
{ url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" },
{ url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" },
{ url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" },
{ url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" },
{ url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" },
{ url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" },
{ url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" },
{ url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" },
{ url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" },
{ url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" },
{ url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" },
{ url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" },
{ url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" },
{ url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" },
{ url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" },
{ url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" },
{ url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" },
{ url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" },
]
[[package]]
name = "markupsafe"
version = "3.0.3"
@ -373,6 +600,129 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
]
[[package]]
name = "numpy"
version = "2.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a9/ed/6388632536f9788cea23a3a1b629f25b43eaacd7d7377e5d6bc7b9deb69b/numpy-2.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:61b0cbabbb6126c8df63b9a3a0c4b1f44ebca5e12ff6997b80fcf267fb3150ef", size = 16669628, upload-time = "2026-03-09T07:56:24.252Z" },
{ url = "https://files.pythonhosted.org/packages/74/1b/ee2abfc68e1ce728b2958b6ba831d65c62e1b13ce3017c13943f8f9b5b2e/numpy-2.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7395e69ff32526710748f92cd8c9849b361830968ea3e24a676f272653e8983e", size = 14696872, upload-time = "2026-03-09T07:56:26.991Z" },
{ url = "https://files.pythonhosted.org/packages/ba/d1/780400e915ff5638166f11ca9dc2c5815189f3d7cf6f8759a1685e586413/numpy-2.4.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:abdce0f71dcb4a00e4e77f3faf05e4616ceccfe72ccaa07f47ee79cda3b7b0f4", size = 5203489, upload-time = "2026-03-09T07:56:29.414Z" },
{ url = "https://files.pythonhosted.org/packages/0b/bb/baffa907e9da4cc34a6e556d6d90e032f6d7a75ea47968ea92b4858826c4/numpy-2.4.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:48da3a4ee1336454b07497ff7ec83903efa5505792c4e6d9bf83d99dc07a1e18", size = 6550814, upload-time = "2026-03-09T07:56:32.225Z" },
{ url = "https://files.pythonhosted.org/packages/7b/12/8c9f0c6c95f76aeb20fc4a699c33e9f827fa0d0f857747c73bb7b17af945/numpy-2.4.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32e3bef222ad6b052280311d1d60db8e259e4947052c3ae7dd6817451fc8a4c5", size = 15666601, upload-time = "2026-03-09T07:56:34.461Z" },
{ url = "https://files.pythonhosted.org/packages/bd/79/cc665495e4d57d0aa6fbcc0aa57aa82671dfc78fbf95fe733ed86d98f52a/numpy-2.4.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7dd01a46700b1967487141a66ac1a3cf0dd8ebf1f08db37d46389401512ca97", size = 16621358, upload-time = "2026-03-09T07:56:36.852Z" },
{ url = "https://files.pythonhosted.org/packages/a8/40/b4ecb7224af1065c3539f5ecfff879d090de09608ad1008f02c05c770cb3/numpy-2.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:76f0f283506c28b12bba319c0fab98217e9f9b54e6160e9c79e9f7348ba32e9c", size = 17016135, upload-time = "2026-03-09T07:56:39.337Z" },
{ url = "https://files.pythonhosted.org/packages/f7/b1/6a88e888052eed951afed7a142dcdf3b149a030ca59b4c71eef085858e43/numpy-2.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737f630a337364665aba3b5a77e56a68cc42d350edd010c345d65a3efa3addcc", size = 18345816, upload-time = "2026-03-09T07:56:42.31Z" },
{ url = "https://files.pythonhosted.org/packages/f3/8f/103a60c5f8c3d7fc678c19cd7b2476110da689ccb80bc18050efbaeae183/numpy-2.4.3-cp312-cp312-win32.whl", hash = "sha256:26952e18d82a1dbbc2f008d402021baa8d6fc8e84347a2072a25e08b46d698b9", size = 5960132, upload-time = "2026-03-09T07:56:44.851Z" },
{ url = "https://files.pythonhosted.org/packages/d7/7c/f5ee1bf6ed888494978046a809df2882aad35d414b622893322df7286879/numpy-2.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:65f3c2455188f09678355f5cae1f959a06b778bc66d535da07bf2ef20cd319d5", size = 12316144, upload-time = "2026-03-09T07:56:47.057Z" },
{ url = "https://files.pythonhosted.org/packages/71/46/8d1cb3f7a00f2fb6394140e7e6623696e54c6318a9d9691bb4904672cf42/numpy-2.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:2abad5c7fef172b3377502bde47892439bae394a71bc329f31df0fd829b41a9e", size = 10220364, upload-time = "2026-03-09T07:56:49.849Z" },
{ url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" },
{ url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" },
{ url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" },
{ url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" },
{ url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" },
{ url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" },
{ url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" },
{ url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" },
{ url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" },
{ url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" },
{ url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" },
{ url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" },
{ url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" },
{ url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" },
{ url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" },
{ url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" },
{ url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" },
{ url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" },
{ url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" },
{ url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" },
{ url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" },
{ url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" },
{ url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" },
{ url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" },
{ url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" },
{ url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" },
{ url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" },
{ url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" },
{ url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" },
{ url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" },
{ url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" },
{ url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" },
{ url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" },
{ url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" },
{ url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" },
{ url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" },
{ url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" },
{ url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" },
{ url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" },
{ url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" },
{ url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" },
{ url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" },
]
[[package]]
name = "orjson"
version = "3.11.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
{ url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
{ url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
{ url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
{ url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
{ url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
{ url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
{ url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
{ url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
{ url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
{ url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
{ url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
{ url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
{ url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
{ url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
{ url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" },
{ url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" },
{ url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" },
{ url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" },
{ url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" },
{ url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" },
{ url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" },
{ url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" },
{ url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" },
{ url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" },
{ url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" },
{ url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" },
{ url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" },
{ url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" },
{ url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" },
{ url = "https://files.pythonhosted.org/packages/e9/1e/745565dca749813db9a093c5ebc4bac1a9475c64d54b95654336ac3ed961/orjson-3.11.7-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:de0a37f21d0d364954ad5de1970491d7fbd0fb1ef7417d4d56a36dc01ba0c0a0", size = 228391, upload-time = "2026-02-02T15:38:27.757Z" },
{ url = "https://files.pythonhosted.org/packages/46/19/e40f6225da4d3aa0c8dc6e5219c5e87c2063a560fe0d72a88deb59776794/orjson-3.11.7-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c2428d358d85e8da9d37cba18b8c4047c55222007a84f97156a5b22028dfbfc0", size = 125188, upload-time = "2026-02-02T15:38:29.241Z" },
{ url = "https://files.pythonhosted.org/packages/9d/7e/c4de2babef2c0817fd1f048fd176aa48c37bec8aef53d2fa932983032cce/orjson-3.11.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c4bc6c6ac52cdaa267552544c73e486fecbd710b7ac09bc024d5a78555a22f6", size = 128097, upload-time = "2026-02-02T15:38:30.618Z" },
{ url = "https://files.pythonhosted.org/packages/eb/74/233d360632bafd2197f217eee7fb9c9d0229eac0c18128aee5b35b0014fe/orjson-3.11.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd0d68edd7dfca1b2eca9361a44ac9f24b078de3481003159929a0573f21a6bf", size = 123364, upload-time = "2026-02-02T15:38:32.363Z" },
{ url = "https://files.pythonhosted.org/packages/79/51/af79504981dd31efe20a9e360eb49c15f06df2b40e7f25a0a52d9ae888e8/orjson-3.11.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:623ad1b9548ef63886319c16fa317848e465a21513b31a6ad7b57443c3e0dcf5", size = 129076, upload-time = "2026-02-02T15:38:33.68Z" },
{ url = "https://files.pythonhosted.org/packages/67/e2/da898eb68b72304f8de05ca6715870d09d603ee98d30a27e8a9629abc64b/orjson-3.11.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6e776b998ac37c0396093d10290e60283f59cfe0fc3fccbd0ccc4bd04dd19892", size = 141705, upload-time = "2026-02-02T15:38:34.989Z" },
{ url = "https://files.pythonhosted.org/packages/c5/89/15364d92acb3d903b029e28d834edb8780c2b97404cbf7929aa6b9abdb24/orjson-3.11.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c6c3af76716f4a9c290371ba2e390ede06f6603edb277b481daf37f6f464e", size = 130855, upload-time = "2026-02-02T15:38:36.379Z" },
{ url = "https://files.pythonhosted.org/packages/c2/8b/ecdad52d0b38d4b8f514be603e69ccd5eacf4e7241f972e37e79792212ec/orjson-3.11.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a56df3239294ea5964adf074c54bcc4f0ccd21636049a2cf3ca9cf03b5d03cf1", size = 133386, upload-time = "2026-02-02T15:38:37.704Z" },
{ url = "https://files.pythonhosted.org/packages/b9/0e/45e1dcf10e17d0924b7c9162f87ec7b4ca79e28a0548acf6a71788d3e108/orjson-3.11.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bda117c4148e81f746655d5a3239ae9bd00cb7bc3ca178b5fc5a5997e9744183", size = 138295, upload-time = "2026-02-02T15:38:39.096Z" },
{ url = "https://files.pythonhosted.org/packages/63/d7/4d2e8b03561257af0450f2845b91fbd111d7e526ccdf737267108075e0ba/orjson-3.11.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:23d6c20517a97a9daf1d48b580fcdc6f0516c6f4b5038823426033690b4d2650", size = 408720, upload-time = "2026-02-02T15:38:40.634Z" },
{ url = "https://files.pythonhosted.org/packages/78/cf/d45343518282108b29c12a65892445fc51f9319dc3c552ceb51bb5905ed2/orjson-3.11.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8ff206156006da5b847c9304b6308a01e8cdbc8cce824e2779a5ba71c3def141", size = 144152, upload-time = "2026-02-02T15:38:42.262Z" },
{ url = "https://files.pythonhosted.org/packages/a9/3a/d6001f51a7275aacd342e77b735c71fa04125a3f93c36fee4526bc8c654e/orjson-3.11.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:962d046ee1765f74a1da723f4b33e3b228fe3a48bd307acce5021dfefe0e29b2", size = 134814, upload-time = "2026-02-02T15:38:43.627Z" },
{ url = "https://files.pythonhosted.org/packages/1d/d3/f19b47ce16820cc2c480f7f1723e17f6d411b3a295c60c8ad3aa9ff1c96a/orjson-3.11.7-cp314-cp314-win32.whl", hash = "sha256:89e13dd3f89f1c38a9c9eba5fbf7cdc2d1feca82f5f290864b4b7a6aac704576", size = 127997, upload-time = "2026-02-02T15:38:45.06Z" },
{ url = "https://files.pythonhosted.org/packages/12/df/172771902943af54bf661a8d102bdf2e7f932127968080632bda6054b62c/orjson-3.11.7-cp314-cp314-win_amd64.whl", hash = "sha256:845c3e0d8ded9c9271cd79596b9b552448b885b97110f628fb687aee2eed11c1", size = 124985, upload-time = "2026-02-02T15:38:46.388Z" },
{ url = "https://files.pythonhosted.org/packages/6f/1c/f2a8d8a1b17514660a614ce5f7aac74b934e69f5abc2700cc7ced882a009/orjson-3.11.7-cp314-cp314-win_arm64.whl", hash = "sha256:4a2e9c5be347b937a2e0203866f12bba36082e89b402ddb9e927d5822e43088d", size = 126038, upload-time = "2026-02-02T15:38:47.703Z" },
]
[[package]]
name = "platformdirs"
version = "4.9.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" },
]
[[package]]
name = "playwright"
version = "1.58.0"
@ -392,6 +742,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
]
[[package]]
name = "playwright-stealth"
version = "2.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "playwright" },
]
sdist = { url = "https://files.pythonhosted.org/packages/61/ee/871901103c7b2a12070011fd4d978191f8f962837bf8bb51847274f528fa/playwright_stealth-2.0.2.tar.gz", hash = "sha256:ac57e51873190da5e653e03720e948c8f0a3d06b098f1d56763103d23ee48143", size = 24902, upload-time = "2026-02-13T02:36:25.137Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/30/f95f087f4b071611a7f63a2a0c9af4df3ac046dae2a693bfdacd70512867/playwright_stealth-2.0.2-py3-none-any.whl", hash = "sha256:37a5733f481b9c0ad602cf71491aa5a7c96c2a2fe4fa1e7ab764d2cd35520f2f", size = 33209, upload-time = "2026-02-13T02:36:26.334Z" },
]
[[package]]
name = "polars"
version = "1.39.0"
@ -450,6 +812,118 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/c4/b4d4827c93ef43c01f599ef31453ccc1c132b353284fc6c87d535c233129/pyee-13.0.1-py3-none-any.whl", hash = "sha256:af2f8fede4171ef667dfded53f96e2ed0d6e6bd7ee3bb46437f77e3b57689228", size = 15659, upload-time = "2026-02-14T21:12:26.263Z" },
]
[[package]]
name = "pyobjc-core"
version = "12.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b8/b6/d5612eb40be4fd5ef88c259339e6313f46ba67577a95d86c3470b951fce0/pyobjc_core-12.1.tar.gz", hash = "sha256:2bb3903f5387f72422145e1466b3ac3f7f0ef2e9960afa9bcd8961c5cbf8bd21", size = 1000532, upload-time = "2025-11-14T10:08:28.292Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/64/5a/6b15e499de73050f4a2c88fff664ae154307d25dc04da8fb38998a428358/pyobjc_core-12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:818bcc6723561f207e5b5453efe9703f34bc8781d11ce9b8be286bb415eb4962", size = 678335, upload-time = "2025-11-14T09:32:20.107Z" },
{ url = "https://files.pythonhosted.org/packages/f4/d2/29e5e536adc07bc3d33dd09f3f7cf844bf7b4981820dc2a91dd810f3c782/pyobjc_core-12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:01c0cf500596f03e21c23aef9b5f326b9fb1f8f118cf0d8b66749b6cf4cbb37a", size = 677370, upload-time = "2025-11-14T09:33:05.273Z" },
{ url = "https://files.pythonhosted.org/packages/1b/f0/4b4ed8924cd04e425f2a07269943018d43949afad1c348c3ed4d9d032787/pyobjc_core-12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:177aaca84bb369a483e4961186704f64b2697708046745f8167e818d968c88fc", size = 719586, upload-time = "2025-11-14T09:33:53.302Z" },
{ url = "https://files.pythonhosted.org/packages/25/98/9f4ed07162de69603144ff480be35cd021808faa7f730d082b92f7ebf2b5/pyobjc_core-12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:844515f5d86395b979d02152576e7dee9cc679acc0b32dc626ef5bda315eaa43", size = 670164, upload-time = "2025-11-14T09:34:37.458Z" },
{ url = "https://files.pythonhosted.org/packages/62/50/dc076965c96c7f0de25c0a32b7f8aa98133ed244deaeeacfc758783f1f30/pyobjc_core-12.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:453b191df1a4b80e756445b935491b974714456ae2cbae816840bd96f86db882", size = 712204, upload-time = "2025-11-14T09:35:24.148Z" },
]
[[package]]
name = "pyobjc-framework-cocoa"
version = "12.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyobjc-core" },
]
sdist = { url = "https://files.pythonhosted.org/packages/02/a3/16ca9a15e77c061a9250afbae2eae26f2e1579eb8ca9462ae2d2c71e1169/pyobjc_framework_cocoa-12.1.tar.gz", hash = "sha256:5556c87db95711b985d5efdaaf01c917ddd41d148b1e52a0c66b1a2e2c5c1640", size = 2772191, upload-time = "2025-11-14T10:13:02.069Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/95/bf/ee4f27ec3920d5c6fc63c63e797c5b2cc4e20fe439217085d01ea5b63856/pyobjc_framework_cocoa-12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:547c182837214b7ec4796dac5aee3aa25abc665757b75d7f44f83c994bcb0858", size = 384590, upload-time = "2025-11-14T09:41:17.336Z" },
{ url = "https://files.pythonhosted.org/packages/ad/31/0c2e734165abb46215797bd830c4bdcb780b699854b15f2b6240515edcc6/pyobjc_framework_cocoa-12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5a3dcd491cacc2f5a197142b3c556d8aafa3963011110102a093349017705118", size = 384689, upload-time = "2025-11-14T09:41:41.478Z" },
{ url = "https://files.pythonhosted.org/packages/23/3b/b9f61be7b9f9b4e0a6db18b3c35c4c4d589f2d04e963e2174d38c6555a92/pyobjc_framework_cocoa-12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:914b74328c22d8ca261d78c23ef2befc29776e0b85555973927b338c5734ca44", size = 388843, upload-time = "2025-11-14T09:42:05.719Z" },
{ url = "https://files.pythonhosted.org/packages/59/bb/f777cc9e775fc7dae77b569254570fe46eb842516b3e4fe383ab49eab598/pyobjc_framework_cocoa-12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:03342a60fc0015bcdf9b93ac0b4f457d3938e9ef761b28df9564c91a14f0129a", size = 384932, upload-time = "2025-11-14T09:42:29.771Z" },
{ url = "https://files.pythonhosted.org/packages/58/27/b457b7b37089cad692c8aada90119162dfb4c4a16f513b79a8b2b022b33b/pyobjc_framework_cocoa-12.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:6ba1dc1bfa4da42d04e93d2363491275fb2e2be5c20790e561c8a9e09b8cf2cc", size = 388970, upload-time = "2025-11-14T09:42:53.964Z" },
]
[[package]]
name = "pysocks"
version = "1.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429, upload-time = "2019-09-20T02:07:35.714Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" },
]
[[package]]
name = "pyyaml"
version = "6.0.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
{ url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
{ url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
{ url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
{ url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
{ url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
{ url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
{ url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
{ url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
{ url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
{ url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
{ url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
{ url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
{ url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
{ url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
{ url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
{ url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
{ url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
{ url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
{ url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
{ url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
{ url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
{ url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
{ url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
{ url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
{ url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
{ url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
{ url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
{ url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
{ url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
{ url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
{ url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
{ url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
{ url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
{ url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
{ url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
{ url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
{ url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "screeninfo"
version = "0.8.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cython", marker = "sys_platform == 'darwin'" },
{ name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ec/bb/e69e5e628d43f118e0af4fc063c20058faa8635c95a1296764acc8167e27/screeninfo-0.8.1.tar.gz", hash = "sha256:9983076bcc7e34402a1a9e4d7dabf3729411fd2abb3f3b4be7eba73519cd2ed1", size = 10666, upload-time = "2022-09-09T11:35:23.419Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/bf/c5205d480307bef660e56544b9e3d7ff687da776abb30c9cb3f330887570/screeninfo-0.8.1-py3-none-any.whl", hash = "sha256:e97d6b173856edcfa3bd282f81deb528188aff14b11ec3e195584e7641be733c", size = 12907, upload-time = "2022-09-09T11:35:21.351Z" },
]
[[package]]
name = "soupsieve"
version = "2.8.3"
@ -459,6 +933,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
]
[[package]]
name = "tqdm"
version = "4.67.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
@ -468,6 +954,35 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
]
[[package]]
name = "ua-parser"
version = "1.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "ua-parser-builtins" },
]
sdist = { url = "https://files.pythonhosted.org/packages/70/0e/ed98be735bc89d5040e0c60f5620d0b8c04e9e7da99ed1459e8050e90a77/ua_parser-1.0.1.tar.gz", hash = "sha256:f9d92bf19d4329019cef91707aecc23c6d65143ad7e29a233f0580fb0d15547d", size = 728106, upload-time = "2025-02-01T14:13:32.508Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/37/be6dfbfa45719aa82c008fb4772cfe5c46db765a2ca4b6f524a1fdfee4d7/ua_parser-1.0.1-py3-none-any.whl", hash = "sha256:b059f2cb0935addea7e551251cbbf42e9a8872f86134163bc1a4f79e0945ffea", size = 31410, upload-time = "2025-02-01T14:13:28.458Z" },
]
[[package]]
name = "ua-parser-builtins"
version = "202603"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3e/6f/73a4d37deefb159556d39d654b5bad67b6874d1ad0b20b96fb5a04de3949/ua_parser_builtins-202603-py3-none-any.whl", hash = "sha256:67478397a68fac1a98fd0a31c416ea7c65a719141fc151d0211316f2cd337cc9", size = 89573, upload-time = "2026-03-01T20:50:02.491Z" },
]
[[package]]
name = "urllib3"
version = "2.6.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
]
[[package]]
name = "werkzeug"
version = "3.1.6"

520
finder/zoopla.py Normal file
View file

@ -0,0 +1,520 @@
"""Zoopla (zoopla.co.uk) scraper — buy and rental properties.
Zoopla is behind Cloudflare Turnstile (managed interactive challenge), which
blocks all HTTP clients (curl_cffi, httpx) and even Playwright with stealth
patches. Only Camoufox (an anti-fingerprinting Firefox fork) passes reliably.
Zoopla uses Next.js App Router with React Server Components (RSC). Search
result data is server-rendered in an RSC stream, not available via
__NEXT_DATA__ or a JSON API. URL-based location slugs return 0 results
the working flow requires typing into the autocomplete input, selecting a
suggestion, and clicking Search.
Architecture:
Unlike the other scrapers which use HTTP clients per outcode, Zoopla keeps
a single Camoufox browser alive for the entire scrape. For each outcode, it:
1. Clears and types the outcode into the search input
2. Selects the first autocomplete suggestion
3. Clicks Search
4. Extracts listing data from the rendered DOM
5. Handles pagination via ?pn=N parameter
The browser session replaces the cookie/client pattern used by other scrapers.
"""
import logging
import re
import time
from constants import DELAY_BETWEEN_PAGES, PROPERTY_TYPE_MAP, ZOOPLA_BASE
from metrics import zoopla_errors_total, zoopla_pages_scraped, zoopla_properties_scraped
from spatial import PostcodeSpatialIndex
log = logging.getLogger("zoopla")
class TurnstileError(Exception):
"""Raised when Cloudflare Turnstile challenge cannot be passed."""
# Maximum search result pages to scrape per outcode (25 listings/page)
MAX_PAGES_PER_OUTCODE = 10
# JavaScript to extract listings from the rendered DOM.
# Finds all detail links, walks up to the card container, and parses
# price, beds, baths, floor area, address, and tenure from the card text.
_EXTRACT_LISTINGS_JS = r"""() => {
const links = Array.from(document.querySelectorAll(
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
));
const seen = new Set();
const results = [];
for (const link of links) {
const href = link.href;
const match = href.match(/\/details\/(\d+)\//);
if (!match) continue;
const id = match[1];
if (seen.has(id)) continue;
seen.add(id);
// Walk up to the listing card container
let card = link;
for (let j = 0; j < 10; j++) {
card = card.parentElement;
if (!card) break;
const text = card.innerText || '';
if (text.includes('\u00a3') && (text.includes('bed') || text.includes('sq ft'))) {
break;
}
}
if (!card) continue;
const text = card.innerText || '';
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
const priceMatch = text.match(/\u00a3([\d,]+)/);
const bedsMatch = text.match(/(\d+)\s*beds?/i);
const bathsMatch = text.match(/(\d+)\s*baths?/i);
const recMatch = text.match(/(\d+)\s*reception/i);
const areaMatch = text.match(/([\d,]+)\s*sq\s*ft/i);
let address = '';
for (const line of lines) {
if (/[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i.test(line) ||
(line.includes(',') && !line.includes('\u00a3') && !/^\d+ beds?/i.test(line))) {
address = line;
break;
}
}
let tenure = '';
if (/freehold/i.test(text)) tenure = 'Freehold';
else if (/leasehold/i.test(text)) tenure = 'Leasehold';
results.push({
id: id,
url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
beds: bedsMatch ? parseInt(bedsMatch[1]) : null,
baths: bathsMatch ? parseInt(bathsMatch[1]) : null,
receptions: recMatch ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address: address,
tenure: tenure,
});
}
return results;
}"""
# JavaScript to dismiss the Usercentrics cookie consent overlay (shadow DOM).
_DISMISS_COOKIES_JS = """() => {
const aside = document.querySelector('#usercentrics-cmp-ui');
if (aside && aside.shadowRoot) {
const btns = aside.shadowRoot.querySelectorAll('button');
for (const btn of btns) {
if (btn.innerText.includes('Accept')) { btn.click(); return true; }
}
}
if (aside) { aside.remove(); return true; }
return false;
}"""
# ---------------------------------------------------------------------------
# Browser lifecycle
# ---------------------------------------------------------------------------
def launch_browser():
"""Launch Camoufox, navigate to Zoopla homepage, pass Cloudflare Turnstile,
and dismiss cookie consent. Returns (browser, page) tuple.
Raises TurnstileError if Cloudflare cannot be passed within 60 seconds.
Caller must close browser when done."""
from camoufox.pkgman import camoufox_path
# Verify camoufox is pre-installed — never download at runtime
camoufox_path(download_if_missing=False)
from camoufox.sync_api import Camoufox
log.info("Launching Camoufox browser for Zoopla...")
browser = Camoufox(headless=True).__enter__()
page = browser.new_page()
log.info("Navigating to Zoopla homepage...")
page.goto(f"{ZOOPLA_BASE}/", wait_until="domcontentloaded", timeout=60000)
# Wait for Cloudflare Turnstile to resolve.
# Try clicking the Turnstile checkbox if present (helps in some cases).
for i in range(20):
if "Just a moment" not in page.title():
break
# Attempt to click the Turnstile checkbox in the challenge iframe
for frame in page.frames:
if "challenges.cloudflare.com" in frame.url:
try:
iframe_el = page.query_selector('iframe[src*="challenges.cloudflare"]')
if iframe_el:
box = iframe_el.bounding_box()
if box:
page.mouse.click(box["x"] + 30, box["y"] + box["height"] / 2)
except Exception:
pass
break
time.sleep(3)
else:
page.close()
browser.close()
raise TurnstileError("Cloudflare Turnstile did not resolve after 60s")
log.info("Cloudflare passed — title: %s", page.title())
time.sleep(2)
# Dismiss cookie consent
page.evaluate(_DISMISS_COOKIES_JS)
time.sleep(1)
return browser, page
def _ensure_not_challenged(page) -> None:
"""Check if current page is a Cloudflare challenge and wait/raise."""
if "Just a moment" not in page.title():
return
log.warning("Cloudflare challenge detected mid-session, waiting...")
for i in range(20):
time.sleep(3)
if "Just a moment" not in page.title():
log.info("Cloudflare challenge resolved")
return
raise TurnstileError("Cloudflare re-challenge did not resolve")
# ---------------------------------------------------------------------------
# Search navigation
# ---------------------------------------------------------------------------
def _navigate_search(page, outcode: str, channel: str) -> bool:
"""Navigate to search results for an outcode via the homepage search flow.
Returns True if results were found, False if no results or navigation failed.
Raises TurnstileError if Cloudflare blocks us."""
# Navigate to homepage to reset search state
page.goto(f"{ZOOPLA_BASE}/", wait_until="domcontentloaded", timeout=30000)
time.sleep(2)
_ensure_not_challenged(page)
# Dismiss cookie consent (may reappear after navigation)
page.evaluate(_DISMISS_COOKIES_JS)
time.sleep(1)
# Select Buy/Rent tab
if channel == "RENT":
rent_tab = page.query_selector(
'button:has-text("Rent"), [role="tab"]:has-text("Rent")'
)
if rent_tab:
rent_tab.click()
time.sleep(0.5)
# Find and fill search input
search_input = page.query_selector(
'input[name="autosuggest-input"]'
) or page.query_selector('input[type="text"]')
if not search_input:
log.warning("Could not find search input on homepage")
return False
search_input.click()
time.sleep(0.3)
search_input.fill("")
search_input.type(outcode, delay=60)
time.sleep(2)
# Select first autocomplete suggestion
first_option = page.query_selector('[role="option"]')
if not first_option:
log.debug("No autocomplete suggestions for outcode %s", outcode)
return False
first_option.click()
time.sleep(0.5)
# Click search button
search_btn = page.query_selector('button:has-text("Search")')
if search_btn:
search_btn.click()
else:
search_input.press("Enter")
# Wait for results to load
time.sleep(6)
_ensure_not_challenged(page)
return True
def _get_result_count(page) -> int:
"""Extract the total results count from the page body text."""
try:
body = page.inner_text("body")
match = re.search(r"([\d,]+)\s+results?", body)
if match:
return int(match.group(1).replace(",", ""))
except Exception:
pass
return 0
# ---------------------------------------------------------------------------
# Extraction and pagination
# ---------------------------------------------------------------------------
def _extract_listings(page) -> list[dict]:
"""Extract listing data from the current search results page DOM."""
try:
return page.evaluate(_EXTRACT_LISTINGS_JS)
except Exception as e:
log.warning("Failed to extract listings from DOM: %s", e)
zoopla_errors_total.labels(type="extract_failed").inc()
return []
def _paginate(page, total_results: int, channel: str) -> list[dict]:
"""Extract listings from all pages of search results.
Page 1 is already loaded. For subsequent pages, clicks the Next button
or navigates via URL parameter ?pn=N."""
all_listings = _extract_listings(page)
channel_label = "buy" if channel == "BUY" else "rent"
zoopla_pages_scraped.labels(channel=channel_label).inc()
if not all_listings or total_results <= len(all_listings):
return all_listings
seen_ids = {listing["id"] for listing in all_listings}
current_url = page.url
page_num = 2
while len(all_listings) < total_results and page_num <= MAX_PAGES_PER_OUTCODE:
time.sleep(DELAY_BETWEEN_PAGES)
# Try navigating via URL parameter
if "?" in current_url:
next_url = re.sub(r"[?&]pn=\d+", "", current_url)
separator = "&" if "?" in next_url else "?"
next_url = f"{next_url}{separator}pn={page_num}"
else:
next_url = f"{current_url}?pn={page_num}"
try:
page.goto(next_url, wait_until="domcontentloaded", timeout=30000)
time.sleep(4)
_ensure_not_challenged(page)
except TurnstileError:
raise
except Exception as e:
log.debug("Pagination navigation failed at page %d: %s", page_num, e)
break
page_listings = _extract_listings(page)
if not page_listings:
break
# Deduplicate within this outcode
new_count = 0
for listing in page_listings:
if listing["id"] not in seen_ids:
seen_ids.add(listing["id"])
all_listings.append(listing)
new_count += 1
zoopla_pages_scraped.labels(channel=channel_label).inc()
if new_count == 0:
break # No new listings on this page
page_num += 1
return all_listings
# ---------------------------------------------------------------------------
# Property transformation
# ---------------------------------------------------------------------------
def _extract_postcode(text: str) -> str | None:
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
return match.group(1).upper().strip()
return None
def _extract_outcode(text: str) -> str | None:
"""Extract a UK outcode from address text like 'Whitechapel Road, London E1'."""
# Look for outcode at end of string or after last comma
match = re.search(r"\b([A-Z]{1,2}\d[A-Z0-9]?)\s*$", text.strip(), re.IGNORECASE)
if match:
return match.group(1).upper()
# Try after comma
parts = text.split(",")
if len(parts) > 1:
last = parts[-1].strip()
match = re.match(r"^([A-Z]{1,2}\d[A-Z0-9]?)$", last, re.IGNORECASE)
if match:
return match.group(1).upper()
return None
def _map_property_type(raw_type: str | None) -> str:
"""Map Zoopla property type text to canonical type."""
if not raw_type:
return "Other"
canonical = PROPERTY_TYPE_MAP.get(raw_type)
if canonical:
return canonical
lower = raw_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower:
return "Detached"
if "semi" in lower:
return "Semi-Detached"
if "terrace" in lower or "mews" in lower:
return "Terraced"
if "house" in lower:
return "Detached"
return "Other"
def transform_property(
raw: dict,
channel: str,
pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]],
) -> dict | None:
"""Transform a raw Zoopla listing dict into the standard output schema.
Zoopla search cards do not include coordinates, so we resolve lat/lng
from postcodes extracted from the address text."""
price = raw.get("price")
if not price:
return None
address = raw.get("address", "")
# Resolve postcode and coordinates from address
postcode = _extract_postcode(address)
lat = lng = None
if postcode:
coords = pc_coords.get(postcode)
if coords:
lat, lng = coords
if lat is None:
# Try outcode-level fallback
outcode = _extract_outcode(address)
if outcode:
prefix = outcode + " "
for pcd, coords in pc_coords.items():
if pcd.startswith(prefix):
postcode = pcd
lat, lng = coords
break
if lat is None or lng is None or not postcode:
return None
# Validate coordinates are in England
if not (49 <= lat <= 56 and -7 <= lng <= 2):
return None
bedrooms = raw.get("beds") or 0
bathrooms = raw.get("baths") or 0
receptions = raw.get("receptions") or 0
# Floor area: convert sq ft to sq m
floor_area_sqm = None
sqft = raw.get("floor_area_sqft")
if sqft:
floor_area_sqm = round(sqft * 0.092903, 1)
listing_id = raw.get("id", "")
listing_url = raw.get("url", "")
if listing_url and not listing_url.startswith("http"):
listing_url = ZOOPLA_BASE + listing_url
return {
"id": f"zp_{listing_id}",
"Bedrooms": bedrooms,
"Bathrooms": bathrooms,
"Number of bedrooms & living rooms": bedrooms + receptions,
"lon": lng,
"lat": lat,
"Postcode": postcode,
"Address per Property Register": address,
"Leasehold/Freehold": raw.get("tenure") or None,
"Property type": "Other", # Not reliably extractable from Zoopla search cards
"Property sub-type": "",
"price": int(price),
"price_frequency": "" if channel == "BUY" else "monthly",
"Price qualifier": "",
"Total floor area (sqm)": floor_area_sqm,
"Listing URL": listing_url,
"Listing features": [],
"first_visible_date": "",
}
# ---------------------------------------------------------------------------
# Top-level search function (called by scraper.py)
# ---------------------------------------------------------------------------
def search_outcode(
page,
outcode: str,
channel: str,
pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]],
) -> list[dict]:
"""Search Zoopla for properties in one outcode.
Takes a live Camoufox Page (from launch_browser). Navigates through the
search flow, extracts listings from rendered DOM, and transforms to the
standard output schema.
Raises TurnstileError if Cloudflare blocks us mid-session.
"""
if not _navigate_search(page, outcode, channel):
return []
total_results = _get_result_count(page)
if total_results == 0:
return []
raw_listings = _paginate(page, total_results, channel)
if not raw_listings:
return []
channel_label = "buy" if channel == "BUY" else "rent"
properties = []
for raw in raw_listings:
transformed = transform_property(raw, channel, pc_index, pc_coords)
if transformed:
properties.append(transformed)
zoopla_properties_scraped.labels(channel=channel_label).inc()
return properties

View file

@ -9,7 +9,6 @@ import Header, { type Page } from './components/ui/Header';
import AuthModal from './components/ui/AuthModal';
import SaveSearchModal from './components/ui/SaveSearchModal';
import LicenseSuccessModal from './components/ui/LicenseSuccessModal';
import VerificationBanner from './components/ui/VerificationBanner';
import type { FeatureMeta, FeatureGroup, POICategoriesResponse, POICategoryGroup } from './types';
import { fetchWithRetry, apiUrl } from './lib/api';
import { trackEvent } from './lib/analytics';
@ -118,15 +117,12 @@ export default function App() {
loginWithOAuth,
logout,
requestPasswordReset,
requestVerification,
refreshAuth,
clearError,
} = useAuth();
const [showAuthModal, setShowAuthModal] = useState(false);
const [authModalTab, setAuthModalTab] = useState<'login' | 'register'>('login');
const [showLicenseSuccess, setShowLicenseSuccess] = useState(false);
const [verificationDismissed, setVerificationDismissed] = useState(false);
useEffect(() => {
const params = new URLSearchParams(window.location.search);
if (params.get('license_success') === '1') {
@ -304,19 +300,11 @@ export default function App() {
onLogout={logout}
isMobile={isMobile}
/>
{user && !user.verified && !verificationDismissed && isAuthRequiredPage && (
<VerificationBanner
email={user.email}
onRequestVerification={requestVerification}
onDismiss={() => setVerificationDismissed(true)}
/>
)}
{activePage === 'home' ? (
<HomePage
onOpenDashboard={() => navigateTo('dashboard')}
onOpenPricing={() => navigateTo('pricing')}
theme={theme}
features={features}
hidePricing={user?.subscription === 'licensed' || user?.isAdmin}
/>
) : activePage === 'pricing' && !(user?.subscription === 'licensed' || user?.isAdmin) ? (
@ -340,6 +328,7 @@ export default function App() {
searchesLoading={savedSearches.loading}
onDeleteSearch={savedSearches.deleteSearch}
onUpdateSearchNotes={savedSearches.updateSearchNotes}
onUpdateSearchName={savedSearches.updateSearchName}
onOpenSearch={(params) => {
window.location.href = `/dashboard?${params}`;
}}
@ -354,11 +343,7 @@ export default function App() {
) : activePage === 'invites' && user ? (
<InvitesPage user={user} />
) : activePage === 'account' && user ? (
<AccountPage
user={user}
onRefreshAuth={refreshAuth}
onRequestVerification={requestVerification}
/>
<AccountPage user={user} onRefreshAuth={refreshAuth} />
) : activePage === 'invite' && inviteCode ? (
<InvitePage
code={inviteCode}
@ -407,6 +392,7 @@ export default function App() {
onUnsaveProperty={user ? savedProperties.deleteProperty : undefined}
isPropertySaved={user ? savedProperties.isPropertySaved : undefined}
getSavedPropertyId={user ? savedProperties.getSavedPropertyId : undefined}
deferTutorial={showLicenseSuccess}
/>
)}
{showAuthModal && (

View file

@ -2,7 +2,7 @@ import { useState, useCallback, useEffect, useRef } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import type { SavedSearch } from '../../hooks/useSavedSearches';
import type { SavedProperty, SavedPropertyData } from '../../hooks/useSavedProperties';
import { apiUrl, authHeaders, assertOk, shortenUrl } from '../../lib/api';
import { apiUrl, authHeaders, assertOk, shortenUrl, prewarmScreenshot } from '../../lib/api';
import { copyToClipboard } from '../../lib/clipboard';
import { formatRelativeTime, formatNumber } from '../../lib/format';
import { summarizeParams } from '../../lib/url-state';
@ -71,13 +71,7 @@ function DeleteDialog({
);
}
function NotesInput({
value,
onSave,
}: {
value: string;
onSave: (notes: string) => void;
}) {
function NotesInput({ value, onSave }: { value: string; onSave: (notes: string) => void }) {
const [text, setText] = useState(value);
const textareaRef = useRef<HTMLTextAreaElement>(null);
const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
@ -146,17 +140,72 @@ function formatPropertyDetails(data: SavedPropertyData): string {
return parts.join(' · ');
}
function EditableName({ value, onSave }: { value: string; onSave: (name: string) => void }) {
const [editing, setEditing] = useState(false);
const [text, setText] = useState(value);
const inputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
setText(value);
}, [value]);
useEffect(() => {
if (editing) {
inputRef.current?.focus();
inputRef.current?.select();
}
}, [editing]);
const commit = () => {
setEditing(false);
const trimmed = text.trim();
if (trimmed && trimmed !== value) onSave(trimmed);
else setText(value);
};
if (editing) {
return (
<input
ref={inputRef}
value={text}
onChange={(e) => setText(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter') commit();
if (e.key === 'Escape') {
setText(value);
setEditing(false);
}
}}
onBlur={commit}
className="w-full font-medium text-navy-950 dark:text-warm-100 bg-warm-50 dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded px-1.5 py-0.5 text-sm focus:outline-none focus:ring-1 focus:ring-teal-400"
/>
);
}
return (
<h3
onClick={() => setEditing(true)}
className="font-medium text-navy-950 dark:text-warm-100 truncate cursor-pointer hover:text-teal-600 dark:hover:text-teal-400 border-b border-dotted border-transparent hover:border-warm-400 dark:hover:border-warm-500"
title="Click to rename"
>
{value}
</h3>
);
}
function SavedSearchesTab({
searches,
loading,
onDelete,
onUpdateNotes,
onUpdateName,
onOpen,
}: {
searches: SavedSearch[];
loading: boolean;
onDelete: (id: string) => Promise<void>;
onUpdateNotes: (id: string, notes: string) => void;
onUpdateName: (id: string, name: string) => void;
onOpen: (params: string) => void;
}) {
const [deleteConfirmId, setDeleteConfirmId] = useState<string | null>(null);
@ -178,6 +227,7 @@ function SavedSearchesTab({
const handleShare = useCallback(
async (params: string, id: string) => {
prewarmScreenshot(params);
setSharingId(id);
try {
const shortUrl = await shortenUrl(params);
@ -207,7 +257,7 @@ function SavedSearchesTab({
No saved searches yet
</p>
<p className="text-sm text-warm-500 dark:text-warm-500">
Save your dashboard filters and view to quickly return to them later.
Save your filters and map view so you can pick up exactly where you left off.
</p>
</div>
);
@ -219,7 +269,7 @@ function SavedSearchesTab({
{searches.map((search) => (
<div
key={search.id}
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden"
className="flex flex-col bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden"
>
{search.screenshotUrl ? (
<img
@ -233,10 +283,13 @@ function SavedSearchesTab({
</div>
)}
<div className="p-4">
<h3 className="font-medium text-navy-950 dark:text-warm-100 truncate mb-1">
{search.name}
</h3>
<div className="p-4 flex flex-col flex-1">
<div className="mb-1">
<EditableName
value={search.name}
onSave={(name) => onUpdateName(search.id, name)}
/>
</div>
<p className="text-xs text-warm-500 dark:text-warm-400 mb-1">
{formatRelativeTime(search.created)}
</p>
@ -244,14 +297,14 @@ function SavedSearchesTab({
{summarizeParams(search.params)}
</p>
<div className="mb-3">
<div className="mb-3 flex-1">
<NotesInput
value={search.notes}
onSave={(notes) => onUpdateNotes(search.id, notes)}
/>
</div>
<div className="flex gap-2">
<div className="flex gap-2 mt-auto">
<button
onClick={() => onOpen(search.params)}
className="flex-1 px-3 py-1.5 text-sm font-medium rounded bg-teal-600 text-white hover:bg-teal-700"
@ -333,7 +386,7 @@ function SavedPropertiesTab({
No saved properties yet
</p>
<p className="text-sm text-warm-500 dark:text-warm-500">
Click the bookmark icon on any property in the dashboard to save it here.
Bookmark properties as you explore and build your shortlist without losing track.
</p>
</div>
);
@ -348,7 +401,7 @@ function SavedPropertiesTab({
return (
<div
key={prop.id}
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden p-4"
className="flex flex-col bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden p-4"
>
<div className="mb-1">
<h3 className="font-medium text-navy-950 dark:text-warm-100 leading-tight">
@ -366,13 +419,11 @@ function SavedPropertiesTab({
{formatRelativeTime(prop.created)}
</p>
<div className="mb-3">
<NotesInput
value={prop.notes}
onSave={(notes) => onUpdateNotes(prop.id, notes)}
/>
<div className="mb-3 flex-1">
<NotesInput value={prop.notes} onSave={(notes) => onUpdateNotes(prop.id, notes)} />
</div>
<div className="mt-auto">
<div className="flex gap-2">
<button
onClick={() => onOpen(prop.postcode)}
@ -399,6 +450,7 @@ function SavedPropertiesTab({
</a>
)}
</div>
</div>
);
})}
</div>
@ -420,6 +472,7 @@ export function SavedPage({
searchesLoading,
onDeleteSearch,
onUpdateSearchNotes,
onUpdateSearchName,
onOpenSearch,
savedProperties,
propertiesLoading,
@ -431,6 +484,7 @@ export function SavedPage({
searchesLoading: boolean;
onDeleteSearch: (id: string) => Promise<void>;
onUpdateSearchNotes: (id: string, notes: string) => void;
onUpdateSearchName: (id: string, name: string) => void;
onOpenSearch: (params: string) => void;
savedProperties: SavedProperty[];
propertiesLoading: boolean;
@ -476,6 +530,7 @@ export function SavedPage({
loading={searchesLoading}
onDelete={onDeleteSearch}
onUpdateNotes={onUpdateSearchNotes}
onUpdateName={onUpdateSearchName}
onOpen={onOpenSearch}
/>
) : (
@ -755,18 +810,13 @@ export function InvitesPage({ user }: { user: AuthUser }) {
export default function AccountPage({
user,
onRefreshAuth,
onRequestVerification,
}: {
user: AuthUser;
onRefreshAuth: () => Promise<void>;
onRequestVerification: (email: string) => Promise<void>;
}) {
const [newsletterSaving, setNewsletterSaving] = useState(false);
const [newsletterError, setNewsletterError] = useState<string | null>(null);
const [verificationSending, setVerificationSending] = useState(false);
const [verificationSent, setVerificationSent] = useState(false);
const badgeColor =
user.subscription === 'licensed'
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400'
@ -782,38 +832,6 @@ export default function AccountPage({
<p className="text-sm text-warm-500 dark:text-warm-400">Email</p>
<p className="text-navy-950 dark:text-warm-100 font-medium">{user.email}</p>
</div>
<div className="flex items-center gap-2">
{!user.verified && (
<button
onClick={async () => {
setVerificationSending(true);
try {
await onRequestVerification(user.email);
setVerificationSent(true);
setTimeout(() => setVerificationSent(false), 3000);
} catch {
// Error handled by hook
} finally {
setVerificationSending(false);
}
}}
disabled={verificationSending || verificationSent}
className="text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 disabled:opacity-50 flex items-center gap-1"
>
{verificationSending && <SpinnerIcon className="w-3 h-3 animate-spin" />}
{verificationSent ? 'Sent!' : 'Resend verification'}
</button>
)}
<span
className={`text-xs font-medium px-2 py-0.5 rounded-full ${
user.verified
? 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400'
: 'bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400'
}`}
>
{user.verified ? 'Verified' : 'Unverified'}
</span>
</div>
</div>
{/* Subscription */}
@ -823,7 +841,7 @@ export default function AccountPage({
<span
className={`inline-block text-sm font-medium px-2.5 py-0.5 rounded-full mt-1 ${badgeColor}`}
>
{user.subscription === 'licensed' ? 'Licensed' : 'Free'}
{user.subscription === 'licensed' ? 'Full Access' : 'Inner London'}
</span>
</div>
</div>

View file

@ -1,25 +1,20 @@
import { useState, useEffect, useRef } from 'react';
import { useFadeInRef } from '../../hooks/useFadeIn';
import HexCanvas from './HexCanvas';
import ScrollStory from './ScrollStory';
import BottomIllustration from './BottomIllustration';
import { TickerValue } from '../ui/TickerValue';
import { ChevronIcon } from '../ui/icons/ChevronIcon';
import { LogoIcon } from '../ui/icons/LogoIcon';
import { trackEvent } from '../../lib/analytics';
import type { FeatureMeta } from '../../types';
export default function HomePage({
onOpenDashboard,
onOpenPricing: _onOpenPricing,
theme = 'light',
features = [],
hidePricing: _hidePricing,
}: {
onOpenDashboard: () => void;
onOpenPricing: () => void;
theme?: 'light' | 'dark';
features?: FeatureMeta[];
hidePricing?: boolean;
}) {
const [statsActive, setStatsActive] = useState(false);
@ -34,7 +29,7 @@ export default function HomePage({
// Scroll depth tracking
const scrolledSections = useRef(new Set<string>());
useEffect(() => {
const ids = ['how-it-works', 'demo'];
const ids = ['how-it-works'];
const observers: IntersectionObserver[] = [];
ids.forEach((id) => {
const el = document.getElementById(id);
@ -142,35 +137,6 @@ export default function HomePage({
</div>
</div>
<div className="flex-1" />
<button
onClick={() => {
trackEvent('CTA Click', { location: 'hero', label: 'see_it_in_action' });
const target = document.getElementById('demo');
if (!target) return;
const scroller = target.closest('.overflow-y-auto') as HTMLElement | null;
if (!scroller) return;
const start = scroller.scrollTop;
const end =
start + target.getBoundingClientRect().top - scroller.getBoundingClientRect().top;
const distance = end - start;
const duration = 1200;
let startTime: number;
const step = (time: number) => {
if (!startTime) startTime = time;
const t = Math.min((time - startTime) / duration, 1);
const ease = t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
scroller.scrollTop = start + distance * ease;
if (t < 1) requestAnimationFrame(step);
};
requestAnimationFrame(step);
}}
className="flex flex-col items-center pb-8 mt-10 md:mt-0 animate-[bounce_3s_ease-in-out_infinite] cursor-pointer"
>
<p className="text-lg md:text-xl font-semibold text-warm-300 mb-2">
See It in Action
</p>
<ChevronIcon direction="down" className="w-6 h-6 text-warm-400" />
</button>
</div>
</div>
@ -181,13 +147,13 @@ export default function HomePage({
</h2>
<div className="space-y-4 text-lg md:text-xl leading-relaxed text-warm-700 dark:text-warm-300">
<p>
Listings show what&apos;s available, not what&apos;s possible &mdash; fragments
without context. Traditional tools force you to begin with a location, separating area
insight from property detail. You search, cross-reference, and repeat per location.
On Rightmove, you pick an area first, then hope it&apos;s good. You end up
cross-referencing crime stats, school reports, and broadband checkers across a dozen
tabs, one postcode at a time.
</p>
<p>
We take a different approach. Start with what matters to you, and the right places
reveal themselves. No context lost. No property missed.
We flip that. Tell us what you need (budget, commute, schools, safety) and we show you
every area in England that qualifies. No guesswork. No wasted viewings.
</p>
</div>
</div>
@ -286,15 +252,6 @@ export default function HomePage({
</div>
</div>
{/* Scrollytelling: Problem + Solution + Demo map */}
<h2
id="demo"
className="text-3xl font-bold text-navy-950 dark:text-warm-100 text-center pt-16 mb-8"
>
See It in Action
</h2>
<ScrollStory features={features} theme={theme} />
{/* The real cost CTA */}
<div className="max-w-4xl mx-auto px-6 pt-20 pb-12">
<div ref={ctaRef} className="fade-in-section text-center">
@ -302,7 +259,7 @@ export default function HomePage({
Make your biggest investment your smartest&nbsp;move.
</h2>
<p className="text-warm-600 dark:text-warm-400 mb-8 max-w-xl mx-auto leading-relaxed">
This deserves proper tools behind it &mdash; don&apos;t leave it to luck.
This deserves proper tools behind it, don&apos;t leave it to luck.
</p>
<button
onClick={() => {

View file

@ -1,466 +0,0 @@
import { useState, useEffect, useRef, useMemo } from 'react';
import MapComponent from '../map/Map';
import { apiUrl, assertOk, authHeaders, isAbortError, logNonAbortError } from '../../lib/api';
import { formatValue } from '../../lib/format';
import { zoomToResolution } from '../../lib/map-utils';
import { DENSITY_GRADIENT, DENSITY_GRADIENT_DARK } from '../../lib/consts';
import { gradientToCss } from '../../lib/utils';
import { SpinnerIcon } from '../ui/icons/SpinnerIcon';
import type { FeatureMeta, HexagonData } from '../../types';
const DEMO_VIEW = { longitude: -0.12, latitude: 51.51, zoom: 5.5, pitch: 0 };
const DEMO_FEATURE_NAMES = [
'Estimated current price',
'Good+ primary schools within 5km',
'Number of restaurants within 2km',
];
const noop = () => {};
// Filter fractions per stage: featureName -> [minFrac, maxFrac]
// 0 = feature.min, 1 = feature.max
interface StageDef {
filters: Record<string, [number, number]>;
travel?: { mode: string; slug: string; min: number; max: number };
}
const STAGES: StageDef[] = [
// 0: No filters — the problem
{ filters: {} },
// 1: Price filter
{
filters: { 'Estimated current price': [0, 0.4] },
},
// 2: Price + schools
{
filters: {
'Estimated current price': [0, 0.4],
'Good+ primary schools within 5km': [0.3, 1],
},
},
// 3: Price + schools + restaurants
{
filters: {
'Estimated current price': [0, 0.4],
'Good+ primary schools within 5km': [0.3, 1],
'Number of restaurants within 2km': [0.15, 1],
},
},
// 4: Price + schools + restaurants + commute to Manchester
{
filters: {
'Estimated current price': [0, 0.4],
'Good+ primary schools within 5km': [0.3, 1],
'Number of restaurants within 2km': [0.15, 1],
},
travel: { mode: 'transit', slug: 'manchester', min: 0, max: 45 },
},
// 5: Summary — same filters
{
filters: {
'Estimated current price': [0, 0.4],
'Good+ primary schools within 5km': [0.3, 1],
'Number of restaurants within 2km': [0.15, 1],
},
travel: { mode: 'transit', slug: 'manchester', min: 0, max: 45 },
},
];
const STEPS: { heading: string | null; body: React.ReactNode }[] = [
{
heading: null,
body: (
<>
<p className="text-base md:text-lg leading-snug md:leading-relaxed mb-2 md:mb-4">
Let&apos;s look at an example:
</p>
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
You&apos;re about to spend{' '}
<strong className="text-navy-950 dark:text-warm-100">up to &pound;500k</strong> on a home.
</p>
</>
),
},
{
heading: null,
body: (
<>
<div className="flex items-center gap-2 md:gap-3 mb-2 md:mb-3">
<div className="shrink-0 w-7 h-7 md:w-8 md:h-8 rounded-full bg-teal-600 text-white flex items-center justify-center font-bold text-xs md:text-sm">
1
</div>
<h3 className="text-lg md:text-xl font-bold text-navy-950 dark:text-warm-100">
Set your must-haves
</h3>
</div>
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
Say you want a home{' '}
<strong className="text-navy-950 dark:text-warm-100">under &pound;500k</strong>&hellip;
</p>
</>
),
},
{
heading: null,
body: (
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
&hellip;with{' '}
<strong className="text-navy-950 dark:text-warm-100">good primary schools</strong>{' '}
nearby&hellip;
</p>
),
},
{
heading: null,
body: (
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
&hellip;and{' '}
<strong className="text-navy-950 dark:text-warm-100">
restaurants within walking distance
</strong>
&hellip;
</p>
),
},
{
heading: null,
body: (
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
&hellip;all within{' '}
<strong className="text-navy-950 dark:text-warm-100">45 minutes of Manchester</strong> by
public transport.
</p>
),
},
{
heading: null,
body: (
<>
<p className="text-base md:text-lg leading-snug md:leading-relaxed mb-2 md:mb-4 font-semibold text-navy-950 dark:text-warm-100">
No area chosen. No listings browsed. Yet you already know exactly where your needs are
met.
</p>
<p className="text-base md:text-lg leading-snug md:leading-relaxed">
That&apos;s just 4 filters. We&apos;ve built{' '}
<strong className="text-navy-950 dark:text-warm-100">56</strong> &mdash; covering commute
times, crime, broadband, noise, schools, amenities, and more.
</p>
</>
),
},
];
interface ScrollStoryProps {
features: FeatureMeta[];
theme: 'light' | 'dark';
}
export default function ScrollStory({ features, theme }: ScrollStoryProps) {
const [stage, setStage] = useState(0);
const [hexData, setHexData] = useState<HexagonData[]>([]);
const [loading, setLoading] = useState(true);
const abortRef = useRef<AbortController>();
const demoFeatures = useMemo(
() =>
DEMO_FEATURE_NAMES.map((name) => features.find((f) => f.name === name)).filter(
Boolean
) as FeatureMeta[],
[features]
);
// Compute actual filter values from stage fractions + feature metadata
const stageFilters = useMemo(() => {
const stageDef = STAGES[stage];
const result: Record<string, [number, number]> = {};
for (const [name, [minFrac, maxFrac]] of Object.entries(stageDef.filters)) {
const meta = demoFeatures.find((f) => f.name === name);
if (meta?.min != null && meta?.max != null) {
const range = meta.max - meta.min;
result[name] = [meta.min + range * minFrac, meta.min + range * maxFrac];
}
}
return result;
}, [stage, demoFeatures]);
const demoView = useMemo(() => DEMO_VIEW, []);
// Derive H3 resolution from current zoom (discrete — only changes at thresholds)
const resolution = zoomToResolution(demoView.zoom);
// Compute bounds string from current view, rounded to 0.5° for stability
const demoBounds = useMemo(() => {
const { longitude, latitude, zoom } = demoView;
const scale = Math.pow(2, zoom);
const degreesPerPixelLng = 360 / (512 * scale);
const halfW = (1200 / 2) * degreesPerPixelLng * 1.3;
const latRad = (latitude * Math.PI) / 180;
const mercY = (1 - Math.log(Math.tan(latRad) + 1 / Math.cos(latRad)) / Math.PI) / 2;
const worldSize = 512 * scale;
const halfH = (800 / 2) * 1.3;
const topY = mercY * worldSize - halfH;
const botY = mercY * worldSize + halfH;
const toLat = (py: number) => {
const my = Math.max(0.001, Math.min(0.999, py / worldSize));
return (Math.atan(Math.sinh(Math.PI * (1 - 2 * my))) * 180) / Math.PI;
};
const snap = (v: number) => Math.round(v * 2) / 2;
const south = snap(Math.max(-85, toLat(botY)));
const west = snap(Math.max(-180, longitude - halfW));
const north = snap(Math.min(85, toLat(topY)));
const east = snap(Math.min(180, longitude + halfW));
return `${south},${west},${north},${east}`;
}, [demoView]);
// Fetch hex data when stage filters change
useEffect(() => {
if (features.length === 0) return;
// Clear stale data and show loading spinner immediately
setLoading(true);
setHexData([]);
const params = new URLSearchParams({
resolution: String(resolution),
bounds: demoBounds,
});
const filterParts: string[] = [];
for (const [name, [min, max]] of Object.entries(stageFilters)) {
filterParts.push(`${name}:${min}:${max}`);
}
if (filterParts.length > 0) params.set('filters', filterParts.join(','));
const stageDef = STAGES[stage];
if (stageDef.travel) {
const { mode, slug, min, max } = stageDef.travel;
params.set('travel', `${mode}:${slug}:${min}:${max}`);
}
const controller = new AbortController();
abortRef.current?.abort();
abortRef.current = controller;
fetch(apiUrl('hexagons', params), authHeaders({ signal: controller.signal }))
.then((res) => {
assertOk(res, 'hexagons');
return res.json();
})
.then((data: { features: HexagonData[] }) => {
setHexData(data.features);
setLoading(false);
})
.catch((err) => {
if (!isAbortError(err)) {
logNonAbortError('Failed to fetch story hexagons', err);
setLoading(false);
}
});
return () => controller.abort();
}, [features, stageFilters, stage, resolution, demoBounds]);
const isLastStage = stage === STEPS.length - 1;
return (
<section className="relative h-[calc(100dvh-3rem)]">
{/* Map background */}
<div className="absolute inset-0 z-0">
<MapComponent
data={stage === 0 ? [] : hexData}
postcodeData={[]}
usePostcodeView={false}
pois={[]}
onViewChange={noop}
viewFeature={null}
colorRange={null}
filterRange={null}
viewSource={null}
onCancelPin={noop}
features={features}
selectedHexagonId={null}
hoveredHexagonId={null}
onHexagonClick={noop}
onHexagonHover={noop}
initialViewState={demoView}
theme={theme}
screenshotMode={true}
hideLegend={true}
/>
</div>
{/* Interaction blocker */}
<div className="absolute inset-0 z-30" />
{/* Loading */}
{loading && (
<div className="absolute inset-0 z-50 flex items-center justify-center bg-warm-50/80 dark:bg-navy-950/80 backdrop-blur-sm">
<SpinnerIcon className="w-10 h-10 text-teal-600 dark:text-teal-400 animate-spin" />
</div>
)}
{/* Filter indicators — top left */}
<div className="absolute top-3 left-3 z-40 pointer-events-none w-[200px] md:w-[340px]">
<div className="bg-white/85 dark:bg-warm-800/85 rounded-xl p-3 md:p-6 backdrop-blur-sm shadow-lg space-y-2.5 md:space-y-5 w-full">
<div className="text-[10px] md:text-xs font-semibold uppercase tracking-wider text-warm-500 dark:text-warm-400">
Filters
</div>
{demoFeatures.map((feature) => {
const filterVal = stageFilters[feature.name];
const isActive = !!filterVal;
const min = feature.min ?? 0;
const max = feature.max ?? 1;
const range = max - min || 1;
const leftPct = filterVal ? ((filterVal[0] - min) / range) * 100 : 0;
const widthPct = filterVal ? ((filterVal[1] - filterVal[0]) / range) * 100 : 100;
return (
<div
key={feature.name}
className={`transition-opacity duration-700 ${isActive ? 'opacity-100' : 'opacity-30'}`}
>
<div className="flex justify-between items-baseline text-xs md:text-sm mb-1 md:mb-1.5 gap-1.5 md:gap-2">
<span
className={`font-medium truncate ${isActive ? 'text-navy-950 dark:text-warm-100' : 'text-warm-400 dark:text-warm-500'}`}
>
{feature.name}
</span>
{isActive && filterVal && (
<span className="text-teal-600 dark:text-teal-400 font-semibold whitespace-nowrap">
{formatValue(filterVal[0], feature)}&ndash;
{formatValue(filterVal[1], feature)}
</span>
)}
</div>
<div className="relative h-1.5 md:h-2.5 bg-warm-200 dark:bg-warm-700 rounded-full overflow-hidden">
<div
className="absolute h-full bg-teal-500 dark:bg-teal-400 rounded-full transition-all duration-700 ease-out"
style={{ left: `${leftPct}%`, width: `${widthPct}%` }}
/>
</div>
</div>
);
})}
{/* Travel time indicator */}
<div
className={`transition-opacity duration-700 ${STAGES[stage].travel ? 'opacity-100' : 'opacity-30'}`}
>
<div className="flex justify-between items-baseline text-xs md:text-sm mb-1 md:mb-1.5 gap-1.5 md:gap-2">
<span
className={`font-medium truncate ${STAGES[stage].travel ? 'text-navy-950 dark:text-warm-100' : 'text-warm-400 dark:text-warm-500'}`}
>
Commute to Manchester
</span>
{STAGES[stage].travel && (
<span className="text-teal-600 dark:text-teal-400 font-semibold whitespace-nowrap">
0&ndash;45 min
</span>
)}
</div>
<div className="relative h-1.5 md:h-2.5 bg-warm-200 dark:bg-warm-700 rounded-full overflow-hidden">
<div
className="absolute h-full bg-teal-500 dark:bg-teal-400 rounded-full transition-all duration-700 ease-out"
style={{ left: '0%', width: STAGES[stage].travel ? '45%' : '100%' }}
/>
</div>
</div>
</div>
</div>
{/* Density legend — top right */}
<div className="absolute top-3 right-3 z-40 pointer-events-none w-[180px] md:w-[220px]">
<div className="bg-white/85 dark:bg-warm-800/85 rounded-xl p-3 md:p-4 backdrop-blur-sm shadow-lg">
<div className="text-[10px] md:text-xs font-semibold uppercase tracking-wider text-warm-500 dark:text-warm-400 mb-2">
Colour
</div>
<div className="text-xs md:text-sm font-medium text-navy-950 dark:text-warm-100 mb-1.5">
Number of properties
</div>
<div
className="h-1.5 md:h-2.5 rounded-full overflow-hidden"
style={{
background: gradientToCss(
theme === 'dark' ? DENSITY_GRADIENT_DARK : DENSITY_GRADIENT
),
}}
/>
<div className="flex justify-between mt-1 text-[10px] md:text-xs text-warm-500 dark:text-warm-400">
<span>Fewer</span>
<span>More</span>
</div>
</div>
</div>
{/* Card stack overlay — bottom on mobile, right-center on desktop */}
<div className="absolute left-0 right-0 bottom-4 md:top-0 md:bottom-0 md:left-auto z-40 flex items-end md:items-center pointer-events-none mx-4 md:mr-[4%] md:ml-auto md:max-w-md md:w-full">
<div className="grid grid-cols-1 grid-rows-1 items-end w-full pointer-events-auto">
{STEPS.map((step, i) => (
<div
key={i}
className={`col-start-1 row-start-1 transition-all duration-500 ease-out ${
i === stage
? 'opacity-100 translate-y-0'
: i < stage
? 'opacity-0 -translate-y-4 pointer-events-none'
: 'opacity-0 translate-y-4 pointer-events-none'
}`}
>
<div className="bg-white/90 dark:bg-warm-800/90 rounded-xl p-4 md:p-6 backdrop-blur-sm shadow-lg border border-warm-200/40 dark:border-warm-700/40">
{step.heading && (
<h3 className="text-lg md:text-xl font-bold text-navy-950 dark:text-warm-100 mb-2 md:mb-3 leading-snug">
{step.heading}
</h3>
)}
<div className="text-warm-700 dark:text-warm-300">{step.body}</div>
{/* Navigation */}
<div className="flex items-center justify-between mt-3 md:mt-5">
{/* Step dots */}
<div className="flex gap-1.5">
{STEPS.map((_, dotIdx) => (
<button
key={dotIdx}
onClick={() => setStage(dotIdx)}
className={`w-2 h-2 rounded-full transition-all duration-300 ${
dotIdx === stage
? 'bg-teal-600 dark:bg-teal-400 w-4'
: dotIdx < stage
? 'bg-teal-600/40 dark:bg-teal-400/40'
: 'bg-warm-300 dark:bg-warm-600'
}`}
aria-label={`Go to step ${dotIdx + 1}`}
/>
))}
</div>
{/* Prev / Next / CTA buttons */}
<div className="flex items-center gap-2">
{stage > 0 && (
<button
onClick={() => setStage(stage - 1)}
className="inline-flex items-center gap-1 px-4 py-2 rounded-lg border border-warm-200 dark:border-warm-700 text-warm-600 dark:text-warm-300 font-semibold text-sm hover:bg-warm-50 dark:hover:bg-warm-800 transition-colors"
>
&larr; Back
</button>
)}
{isLastStage ? (
<a
href="/dashboard"
className="inline-flex items-center gap-2 px-4 py-2 rounded-lg bg-teal-600 text-white font-semibold text-sm hover:bg-teal-700 transition-colors"
>
Start exploring &rarr;
</a>
) : (
<button
onClick={() => setStage(stage + 1)}
className="inline-flex items-center gap-1 px-4 py-2 rounded-lg bg-teal-600 text-white font-semibold text-sm hover:bg-teal-700 transition-colors"
>
Next &rarr;
</button>
)}
</div>
</div>
</div>
</div>
))}
</div>
</div>
</section>
);
}

View file

@ -94,10 +94,14 @@ export default function InvitePage({
const isDark = theme === 'dark';
// Signal screenshot readiness once loading completes
// Signal screenshot readiness once loading completes and a frame has painted
useEffect(() => {
if (screenshotMode && !loading) {
requestAnimationFrame(() => {
requestAnimationFrame(() => {
window.__screenshot_ready = true;
});
});
}
}, [screenshotMode, loading]);
@ -313,7 +317,7 @@ export default function InvitePage({
<button
onClick={handleRedeem}
disabled={redeeming}
className="w-full px-6 py-3 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-lg shadow-lg shadow-coral-500/25 disabled:opacity-50 disabled:cursor-wait flex items-center justify-center gap-2"
className="w-full px-6 py-3 bg-teal-600 text-white rounded-lg font-semibold hover:bg-teal-700 transition-colors text-lg shadow-lg shadow-teal-600/25 disabled:opacity-50 disabled:cursor-wait flex items-center justify-center gap-2"
>
{redeeming && <SpinnerIcon className="w-5 h-5 animate-spin" />}
{isAdminInvite

View file

@ -142,19 +142,19 @@ const FAQ_SECTIONS: FAQSection[] = [
title: 'Finding Your Area',
items: [
{
question: "I don't even know which areas to look at \u2014 can this help with that?",
question: "I don't even know which areas to look at. Can this help?",
answer:
"That's exactly what it's for. Set your filters (budget, commute time, low crime, good schools \u2014 whatever matters) and the map lights up to show you where ticks every box. No more Googling \"best areas to live near Manchester\" at 1am.",
'That\'s exactly what it\'s for. Set your filters (budget, commute time, low crime, good schools, whatever matters) and the map lights up to show you where ticks every box. No more Googling "best areas to live near Manchester" at 1am.',
},
{
question: "I'm moving somewhere I've never been \u2014 how do I even start?",
question: "I'm moving somewhere I've never been. How do I even start?",
answer:
"Set your filters for what matters and the map instantly highlights the areas that qualify. You go from \"I don't know a single street\" to a shortlist in minutes \u2014 it's like having a local's knowledge of every neighbourhood in England.",
"Set your filters for what matters and the map instantly highlights the areas that qualify. You go from \"I don't know a single street\" to a shortlist in minutes. It's like having a local's knowledge of every neighbourhood in England.",
},
{
question: 'How do I find areas that tick all my boxes at once?',
answer:
'Stack multiple filters \u2014 say, crime below average, good schools, and commute under 40 minutes \u2014 then colour the map by price to spot the affordable sweet spots. The map updates live as you drag sliders, so you can watch neighbourhoods light up or drop off in real time.',
'Stack multiple filters (crime below average, good schools, commute under 40 minutes) then colour the map by price to spot the affordable sweet spots. The map updates live as you drag sliders, so you can watch neighbourhoods light up or drop off in real time.',
},
],
},
@ -164,7 +164,7 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'Can I see how long my commute would actually be from different areas?',
answer:
"Set your workplace as a destination and we'll colour every postcode by journey time \u2014 by car, bike, or public transport. Filter to your max commute and the rest disappears, so you're only looking at areas that actually work.",
"Set your workplace as a destination and we'll colour every postcode by journey time, whether that's by car, bike, or public transport. Filter to your max commute and the rest disappears.",
},
{
question: 'How is that better than checking Google Maps?',
@ -179,12 +179,12 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'How do I find areas where I get the most space for my money?',
answer:
"Filter by price per sqm \u2014 you'll instantly see which postcodes give you the most square footage per pound. Pair it with the energy rating filter to avoid cheap-but-freezing money pits.",
"Filter by price per sqm and you'll instantly see which postcodes give you the most square footage per pound. Pair it with the energy rating filter to avoid cheap-but-freezing money pits.",
},
{
question: "How do I make sure a cheap area isn't cheap for a reason?",
answer:
"Layer deprivation scores, crime stats, school ratings, and broadband speeds alongside price. If a postcode is affordable AND scores well on the stuff that matters, that's your hidden gem \u2014 not just a cheap postcode with a catch.",
"Layer deprivation scores, crime stats, school ratings, and broadband speeds alongside price. If a postcode is affordable AND scores well on the stuff that matters, that's your hidden gem, not just a cheap postcode with a catch.",
},
],
},
@ -194,12 +194,13 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'How can I check if an area is safe before I move there?',
answer:
"We overlay real police-recorded crime data \u2014 broken down by type \u2014 onto every neighbourhood in England. Filter by violent crime, burglary, or antisocial behaviour and instantly see which postcodes have the lowest numbers, so you're not relying on gut feeling.",
'We overlay real police-recorded crime data, broken down by type, onto every neighbourhood in England. Filter by violent crime, burglary, or antisocial behaviour and instantly see which postcodes have the lowest numbers.',
},
{
question: 'I keep finding flats that look great online, then the area turns out to be grim.',
question:
'I keep finding flats that look great online, then the area turns out to be grim.',
answer:
"That's why we built this. Stack crime rates, noise levels, deprivation scores, nearby pubs and parks, and broadband speeds all on one map \u2014 so you know what a neighbourhood is actually like before you waste a Saturday viewing.",
"That's why we built this. Stack crime rates, noise levels, deprivation scores, nearby pubs and parks, and broadband speeds all on one map so you know what a neighbourhood is actually like before you waste a Saturday viewing.",
},
],
},
@ -209,7 +210,7 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'Can I find areas with good schools AND low crime in one search?',
answer:
'Yes \u2014 stack filters for Ofsted ratings, crime rates, parks, and whatever else matters to your family, then watch the map highlight only the areas that tick every box. No more cross-referencing five different websites with a spreadsheet.',
'Absolutely. Stack filters for Ofsted ratings, crime rates, parks, and whatever else matters to your family, then watch the map highlight only the areas that tick every box. No more cross-referencing five different websites with a spreadsheet.',
},
{
question: 'How do I know if a neighbourhood has parks and playgrounds nearby?',
@ -222,9 +223,9 @@ const FAQ_SECTIONS: FAQSection[] = [
title: 'Environment & Quality of Life',
items: [
{
question: 'Can I find energy-efficient homes that aren\'t on a noisy road?',
question: "Can I find energy-efficient homes that aren't on a noisy road?",
answer:
'Filter by EPC rating (A\u2013C), then layer on road noise data to rule out anything above your threshold. Colour-code by either feature to spot quiet, efficient streets at a glance.',
'Filter by EPC rating (A to C), then layer on road noise data to rule out anything above your threshold. Colour-code by either feature to spot quiet, efficient streets at a glance.',
},
{
question: 'Does it show flood or subsidence risk?',
@ -242,9 +243,9 @@ const FAQ_SECTIONS: FAQSection[] = [
title: 'Why Perfect Postcode',
items: [
{
question: 'I already use Rightmove \u2014 what does this add?',
question: 'I already use Rightmove. What does this add?',
answer:
"Rightmove shows you houses. We show you areas. You'll see 56 layers of data \u2014 crime rates, school ratings, broadband speeds, noise levels, deprivation scores \u2014 all on one map, so you can judge a neighbourhood before you even look at listings.",
"Rightmove shows you houses. We show you areas. You'll see 56 layers of data (crime rates, school ratings, broadband speeds, noise levels, deprivation scores) all on one map, so you can judge a neighbourhood before you even look at listings.",
},
{
question: "Can't I just research all this myself for free?",
@ -254,7 +255,7 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'Where does the data actually come from?',
answer:
"Every dataset comes from official UK government sources \u2014 Land Registry, the EPC register, ONS, Ofsted, Ofcom, data.police.uk, and Defra. We don't scrape estate agents or make anything up \u2014 you can verify any record against the original source.",
"Every dataset comes from official UK government sources: Land Registry, the EPC register, ONS, Ofsted, Ofcom, data.police.uk, and Defra. We don't scrape estate agents or make anything up. You can verify any record against the original source.",
},
],
},
@ -264,7 +265,7 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'Is it really worth paying for a property search tool?',
answer:
"You're making a decision worth \u00a3200k\u2013\u00a3500k or more. Even spotting one red flag \u2014 a noisy road, poor broadband, rising crime \u2014 that changes your mind could save you years of regret. This costs less than a single viewing trip in petrol.",
"You're making a decision worth \u00a3200k to \u00a3500k or more. Even spotting one red flag (a noisy road, poor broadband, rising crime) that changes your mind could save you years of regret. This costs less than a single viewing trip in petrol.",
},
{
question: "Is this another subscription that'll drain my account?",
@ -274,12 +275,12 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'What can I access on the free tier?',
answer:
'Free users can explore all features within inner London (roughly zones 1\u20132). To access data for the rest of England, you need lifetime access.',
'Free users can explore all features within inner London (roughly zones 1 to 2). To access data for the rest of England, you need lifetime access.',
},
{
question: 'Can I get a refund?',
answer:
'Yes \u2014 we offer a 30-day money-back guarantee. If you\u2019re not satisfied, email support@perfect-postcode.co.uk within 30 days for a full refund.',
'Absolutely. We offer a 30-day money-back guarantee. If you\u2019re not satisfied, email support@perfect-postcode.co.uk within 30 days for a full refund.',
},
],
},
@ -289,12 +290,12 @@ const FAQ_SECTIONS: FAQSection[] = [
{
question: 'How do I use the AI filter instead of adding filters one by one?',
answer:
'Type what you want in plain English \u2014 something like "quiet area near good schools with fast broadband under \u00a3400k" \u2014 and it\'ll set up all the relevant filters in one go. Tweak any of them manually afterwards.',
'Type what you want in plain English, something like "quiet area near good schools with fast broadband under \u00a3400k", and it\'ll set up all the relevant filters in one go. Tweak any of them manually afterwards.',
},
{
question: 'Can I save a search and come back to it later?',
answer:
'Hit the save button and everything is captured \u2014 your filters, zoom level, and which data layer you\u2019re colouring by. Pick up exactly where you left off or share the link with your partner.',
'Hit the save button and everything is captured: your filters, zoom level, and which data layer you\u2019re colouring by. Pick up exactly where you left off or share the link with your partner.',
},
{
question: "Can I export the data I'm looking at?",
@ -484,7 +485,7 @@ export default function LearnPage() {
) : tab === 'faq' ? (
<div className="max-w-3xl mx-auto px-6 py-6 w-full">
<p className="text-warm-600 dark:text-warm-400 mb-6">
Whether you&apos;re buying, renting, or just exploring &mdash; here&apos;s how Perfect
Whether you&apos;re buying, renting, or just exploring, here&apos;s how Perfect
Postcode helps you find the right area.
</p>
<div className="space-y-8">

View file

@ -127,7 +127,7 @@ export default memo(function AiFilterInput({
<SparklesIcon className="w-3.5 h-3.5 text-teal-500 dark:text-teal-400 shrink-0" />
<span className="text-xs font-medium text-teal-700 dark:text-teal-300">AI Search</span>
<span className="text-xs text-warm-400 dark:text-warm-500">
&mdash; describe what you&apos;re looking for
describe what you&apos;re looking for
</span>
</div>
<form onSubmit={handleSubmit} className="flex items-center gap-1.5">
@ -170,12 +170,6 @@ export default memo(function AiFilterInput({
))}
</div>
)}
{error && errorType === 'verification' && (
<p className="mt-1.5 text-xs text-amber-600 dark:text-amber-400">
Please verify your email address to use AI-powered search. Check your inbox for a
verification link.
</p>
)}
{error && errorType === 'limit' && (
<p className="mt-1.5 text-xs text-amber-600 dark:text-amber-400">
You&apos;ve reached the weekly AI usage limit. It will reset automatically next week.

View file

@ -78,7 +78,7 @@ export default function AreaPane({
<EmptyState
icon={<InfoIcon className="w-8 h-8 text-warm-300 dark:text-warm-600" />}
title="No area selected"
description="Click a hexagon or postcode to view area statistics"
description="Click any coloured area on the map to see crime, schools, prices, and more"
centered
/>
);

View file

@ -10,23 +10,18 @@ import { groupFeaturesByCategory } from '../../lib/features';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
import { FeatureActions } from '../ui/FeatureIcons';
import { FeatureLabel } from '../ui/FeatureLabel';
import { CarIcon, BicycleIcon, WalkingIcon, TransitIcon, PlusIcon } from '../ui/icons';
import type { ComponentType } from 'react';
import { PlusIcon, InfoIcon } from '../ui/icons';
import { IconButton } from '../ui/IconButton';
import { TravelTimeInfoPopup } from '../ui/TravelTimeInfoPopup';
import {
TRANSPORT_MODES,
MODE_LABELS,
MODE_DESCRIPTIONS,
MODE_ICONS,
type TransportMode,
type TravelTimeEntry,
} from '../../hooks/useTravelTime';
const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
interface FeatureBrowserProps {
availableFeatures: FeatureMeta[];
allFeatures: FeatureMeta[];
@ -58,6 +53,7 @@ export default function FeatureBrowser({
}: FeatureBrowserProps) {
const [search, setSearch] = useState('');
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
const [travelInfoMode, setTravelInfoMode] = useState<TransportMode | null>(null);
const [expandedGroups, toggleGroup] = useCollapsibleGroups();
const availableTravelModes = useTravelModes();
@ -89,7 +85,19 @@ export default function FeatureBrowser({
const showTravelModes =
visibleModes.length > 0 &&
(!search ||
'travel time journey commute car bicycle walking transit'.includes(search.toLowerCase()));
'travel time journey commute car bicycle walking transit transport station tube train'.includes(
search.toLowerCase()
));
// Ensure "Transport" group exists when travel modes should be shown
const mergedGrouped = useMemo(() => {
if (!showTravelModes) return grouped;
if (grouped.some((g) => g.name === 'Transport')) return grouped;
const groups = [...grouped];
const propsIdx = groups.findIndex((g) => g.name === 'Properties in the area');
groups.splice(propsIdx === -1 ? 0 : propsIdx + 1, 0, { name: 'Transport', features: [] });
return groups;
}, [grouped, showTravelModes]);
return (
<>
@ -97,19 +105,50 @@ export default function FeatureBrowser({
<SearchInput value={search} onChange={setSearch} placeholder="Search features..." />
</div>
<div className="md:min-h-0 md:flex-1 md:overflow-y-auto flex flex-col">
{showTravelModes && (
<div className="shrink-0">
{mergedGrouped.map((group) => {
const isExpanded = isSearching || expandedGroups.has(group.name);
return (
<div key={group.name} className="shrink-0">
<CollapsibleGroupHeader
name="Travel Time"
expanded={isSearching || expandedGroups.has('Travel Time')}
onToggle={() => toggleGroup('Travel Time')}
name={group.name}
expanded={isExpanded}
onToggle={() => toggleGroup(group.name)}
className="px-3 py-2.5 text-sm font-bold text-navy-950 bg-warm-200 dark:bg-navy-900 dark:text-warm-100 sticky top-0 z-10 hover:bg-warm-200 dark:hover:bg-warm-800"
>
<span className="text-xs font-medium text-warm-400 dark:text-warm-500">
{visibleModes.length}
{group.features.length +
(group.name === 'Transport' && showTravelModes ? visibleModes.length : 0)}
</span>
</CollapsibleGroupHeader>
{(isSearching || expandedGroups.has('Travel Time')) &&
{isExpanded && (
<>
{group.features.map((f) => {
const isPinned = pinnedFeature === f.name;
return (
<div
key={f.name}
className="flex items-center justify-between px-3 py-1.5 hover:bg-teal-50 dark:hover:bg-teal-900/30 dark:text-warm-300"
>
<div className="min-w-0 mr-2">
<FeatureLabel feature={f} size="sm" />
{f.description && (
<span className="text-xs text-warm-400 dark:text-warm-500 truncate block">
{f.description}
</span>
)}
</div>
<FeatureActions
feature={f}
isPinned={isPinned}
onTogglePin={onTogglePin}
onShowInfo={setInfoFeature}
onAdd={onAddFilter}
/>
</div>
);
})}
{group.name === 'Transport' &&
showTravelModes &&
visibleModes.map((mode) => {
const ModeIcon = MODE_ICONS[mode];
return (
@ -132,6 +171,12 @@ export default function FeatureBrowser({
</div>
</div>
<div className="flex items-center gap-0.5 shrink-0">
<IconButton
onClick={() => setTravelInfoMode(mode)}
title="Feature info"
>
<InfoIcon className="w-3.5 h-3.5" />
</IconButton>
<button
onClick={() => onAddTravelTimeEntry(mode)}
title={`Add ${MODE_LABELS[mode]} travel time`}
@ -143,51 +188,12 @@ export default function FeatureBrowser({
</div>
);
})}
</div>
</>
)}
{grouped.map((group) => {
const isExpanded = isSearching || expandedGroups.has(group.name);
return (
<div key={group.name} className="shrink-0">
<CollapsibleGroupHeader
name={group.name}
expanded={isExpanded}
onToggle={() => toggleGroup(group.name)}
className="px-3 py-2.5 text-sm font-bold text-navy-950 bg-warm-200 dark:bg-navy-900 dark:text-warm-100 sticky top-0 z-10 hover:bg-warm-200 dark:hover:bg-warm-800"
>
<span className="text-xs font-medium text-warm-400 dark:text-warm-500">
{group.features.length}
</span>
</CollapsibleGroupHeader>
{isExpanded &&
group.features.map((f) => {
const isPinned = pinnedFeature === f.name;
return (
<div
key={f.name}
className="flex items-center justify-between px-3 py-1.5 hover:bg-teal-50 dark:hover:bg-teal-900/30 dark:text-warm-300"
>
<div className="min-w-0 mr-2">
<FeatureLabel feature={f} onShowInfo={setInfoFeature} size="sm" />
{f.description && (
<span className="text-xs text-warm-400 dark:text-warm-500 truncate block">
{f.description}
</span>
)}
</div>
<FeatureActions
feature={f}
isPinned={isPinned}
onTogglePin={onTogglePin}
onAdd={onAddFilter}
/>
</div>
);
})}
</div>
);
})}
{grouped.length === 0 ? (
{mergedGrouped.length === 0 ? (
<EmptyState
icon={<FilterIcon className="w-8 h-8 text-warm-300 dark:text-warm-600" />}
title={search ? 'No matching features' : 'All features are active'}
@ -237,6 +243,9 @@ export default function FeatureBrowser({
onNavigateToSource={onNavigateToSource}
/>
)}
{travelInfoMode && (
<TravelTimeInfoPopup mode={travelInfoMode} onClose={() => setTravelInfoMode(null)} />
)}
</>
);
}

View file

@ -1,15 +1,12 @@
import { memo, useState, useMemo, useRef, useCallback, useEffect } from 'react';
import { Slider } from '../ui/Slider';
import { LightbulbIcon } from '../ui/icons';
import { ChevronIcon, LightbulbIcon } from '../ui/icons';
import { CollapsibleGroupHeader } from '../ui/CollapsibleGroupHeader';
import { PillToggle } from '../ui/PillToggle';
import { PillGroup } from '../ui/PillGroup';
import type { FeatureMeta, FeatureFilters } from '../../types';
import { formatFilterValue, buildPercentileScale } from '../../lib/format';
import { formatFilterValue, parseInputValue, buildPercentileScale } from '../../lib/format';
import type { PercentileScale } from '../../lib/format';
import { groupFeaturesByCategory } from '../../lib/features';
import { useCollapsibleGroups } from '../../hooks/useCollapsibleGroups';
import InfoPopup from '../ui/InfoPopup';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
import { FeatureActions } from '../ui/FeatureIcons';
@ -26,6 +23,73 @@ import {
type ListingType = 'historical' | 'buy' | 'rent';
function EditableLabel({
value,
formatted,
onCommit,
prefix,
suffix,
className,
style,
}: {
value: number;
formatted: string;
onCommit: (v: number) => void;
prefix?: string;
suffix?: string;
className?: string;
style?: React.CSSProperties;
}) {
const [editing, setEditing] = useState(false);
const [text, setText] = useState('');
const inputRef = useRef<HTMLInputElement>(null);
const startEdit = () => {
setEditing(true);
setText(String(Math.round(value)));
};
const commit = () => {
const parsed = parseInputValue(text, { prefix, suffix });
if (parsed != null) onCommit(parsed);
setEditing(false);
};
useEffect(() => {
if (editing) {
inputRef.current?.focus();
inputRef.current?.select();
}
}, [editing]);
if (editing) {
return (
<input
ref={inputRef}
value={text}
onChange={(e) => setText(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter') commit();
if (e.key === 'Escape') setEditing(false);
}}
onBlur={commit}
className="absolute w-16 text-[10px] text-center rounded border border-warm-300 dark:border-warm-600 bg-white dark:bg-warm-800 text-warm-700 dark:text-warm-200 px-0.5 focus:outline-none focus:ring-1 focus:ring-teal-400"
style={style}
/>
);
}
return (
<span
className={`absolute cursor-pointer hover:text-teal-600 dark:hover:text-teal-400 border-b border-dotted border-warm-400 dark:border-warm-500 ${className ?? ''}`}
style={style}
onClick={startEdit}
>
{formatted}
</span>
);
}
function SliderLabels({
min,
max,
@ -34,6 +98,8 @@ function SliderLabels({
isAtMin,
isAtMax,
raw,
feature,
onValueChange,
}: {
min: number;
max: number;
@ -42,18 +108,55 @@ function SliderLabels({
isAtMin?: boolean;
isAtMax?: boolean;
raw?: boolean;
feature?: FeatureMeta;
onValueChange?: (v: [number, number]) => void;
}) {
const range = max - min || 1;
const leftPct = ((value[0] - min) / range) * 100;
const rightPct = ((value[1] - min) / range) * 100;
const labels = displayValues || value;
const minLabel = isAtMin ? 'min' : formatFilterValue(labels[0], raw);
const maxLabel = isAtMax ? 'max' : formatFilterValue(labels[1], raw);
// Smoothly spread labels apart as thumbs get close to prevent overlap.
// t=1 (centered) when far apart, t=0 (split) when touching.
const SPREAD_THRESHOLD = 20; // percentage gap below which labels start separating
const gapPct = rightPct - leftPct;
const t = Math.min(1, Math.max(0, gapPct / SPREAD_THRESHOLD));
const leftTranslate = `translateX(${-100 + t * 50}%)`;
const rightTranslate = `translateX(${-t * 50}%)`;
if (feature && onValueChange) {
return (
<div className="relative h-4 mt-2 mx-2.5 text-[10px] text-warm-500 dark:text-warm-400 leading-tight">
<span className="absolute -translate-x-1/2" style={{ left: `${leftPct}%` }}>
{isAtMin ? 'min' : formatFilterValue(labels[0], raw)}
<EditableLabel
value={labels[0]}
formatted={minLabel}
onCommit={(v) => onValueChange([Math.min(v, labels[1]), labels[1]])}
prefix={feature.prefix}
suffix={feature.suffix}
style={{ left: `${leftPct}%`, transform: leftTranslate }}
/>
<EditableLabel
value={labels[1]}
formatted={maxLabel}
onCommit={(v) => onValueChange([labels[0], Math.max(v, labels[0])])}
prefix={feature.prefix}
suffix={feature.suffix}
style={{ left: `${rightPct}%`, transform: rightTranslate }}
/>
</div>
);
}
return (
<div className="relative h-4 mt-2 mx-2.5 text-[10px] text-warm-500 dark:text-warm-400 leading-tight">
<span className="absolute" style={{ left: `${leftPct}%`, transform: leftTranslate }}>
{minLabel}
</span>
<span className="absolute -translate-x-1/2" style={{ left: `${rightPct}%` }}>
{isAtMax ? 'max' : formatFilterValue(labels[1], raw)}
<span className="absolute" style={{ left: `${rightPct}%`, transform: rightTranslate }}>
{maxLabel}
</span>
</div>
);
@ -249,29 +352,25 @@ export default memo(function Filters({
const scrollRef = useRef<HTMLDivElement>(null);
const [showPhilosophy, setShowPhilosophy] = useState(false);
const [activeInfoFeature, setActiveInfoFeature] = useState<FeatureMeta | null>(null);
const [collapsedGroups, toggleGroup, expandGroup] = useCollapsibleGroups();
const [addFilterCollapsed, setAddFilterCollapsed] = useState(false);
const activeEntryCount = travelTimeEntries.length;
const pendingScrollRef = useRef<string | null>(null);
const handleAddAndScroll = useCallback(
(name: string) => {
const feature = features.find((f) => f.name === name);
if (feature?.group) expandGroup(feature.group);
pendingScrollRef.current = name;
onAddFilter(name);
},
[onAddFilter, features, expandGroup]
[onAddFilter]
);
const handleAddTravelTimeAndScroll = useCallback(
(mode: TransportMode) => {
expandGroup('Transport');
pendingScrollRef.current = `tt_${travelTimeEntries.length}`;
onTravelTimeAddEntry(mode);
},
[onTravelTimeAddEntry, travelTimeEntries.length, expandGroup]
[onTravelTimeAddEntry, travelTimeEntries.length]
);
useEffect(() => {
@ -283,21 +382,6 @@ export default memo(function Filters({
el.scrollIntoView({ behavior: 'smooth', block: 'start' });
}
}, [enabledFeatureList, travelTimeEntries]);
const enabledGroups = useMemo(
() => groupFeaturesByCategory(enabledFeatureList),
[enabledFeatureList]
);
// Ensure "Transport" group exists in active filters when travel time entries are present
const mergedGroups = useMemo(() => {
if (travelTimeEntries.length === 0) return enabledGroups;
if (enabledGroups.some((g) => g.name === 'Transport')) return enabledGroups;
const groups = [...enabledGroups];
const propsIdx = groups.findIndex((g) => g.name === 'Properties in the area');
groups.splice(propsIdx === -1 ? 0 : propsIdx + 1, 0, { name: 'Transport', features: [] });
return groups;
}, [enabledGroups, travelTimeEntries.length]);
const percentileScales = useMemo(() => {
const scales = new Map<string, PercentileScale>();
for (const f of features) {
@ -313,12 +397,14 @@ export default memo(function Filters({
return (
<div
ref={containerRef}
className="flex flex-col bg-white dark:bg-navy-950 overflow-y-auto md:overflow-hidden h-full"
className="flex flex-col bg-white dark:bg-navy-950 overflow-y-auto md:overflow-hidden h-full touch-pan-y"
>
<div className="shrink-0 md:shrink md:min-h-0 flex flex-col md:basis-[40%]">
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
<div
className={`shrink-0 md:shrink md:min-h-0 flex flex-col ${addFilterCollapsed ? '' : 'md:basis-[40%]'}`}
>
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700 bg-teal-50 dark:bg-teal-900/30">
<div className="flex items-center gap-2">
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">
<span className="text-sm font-semibold text-teal-700 dark:text-teal-400">
Active Filters
</span>
{badgeCount > 0 && (
@ -368,22 +454,15 @@ export default memo(function Filters({
Finding the Perfect Postcode
</button>
</div>
{travelTimeEntries.length > 0 && (
<div>
<CollapsibleGroupHeader
name="Travel Time"
expanded={!collapsedGroups.has('Travel Time')}
onToggle={() => toggleGroup('Travel Time')}
className="px-3 py-2.5 text-sm font-bold text-navy-950 bg-warm-200 dark:bg-navy-900 dark:text-warm-100 sticky top-0 z-10 hover:bg-warm-200 dark:hover:bg-warm-800"
>
<span className="text-xs font-medium text-warm-400 dark:text-warm-500">
{travelTimeEntries.length}
</span>
</CollapsibleGroupHeader>
{!collapsedGroups.has('Travel Time') && (
{enabledFeatureList.length === 0 && activeEntryCount === 0 && (
<p className="px-3 py-1.5 text-xs text-warm-400 dark:text-warm-500">
Add filters below to narrow the map to areas that match
</p>
)}
<div className="px-2 py-1 space-y-1">
{travelTimeEntries.map((entry, index) => (
<div key={index} data-filter-name={`tt_${index}`} className="scroll-mt-10">
<div key={`tt_${index}`} data-filter-name={`tt_${index}`}>
<TravelTimeCard
mode={entry.mode}
slug={entry.slug}
@ -392,43 +471,14 @@ export default memo(function Filters({
useBest={entry.useBest}
isPinned={pinnedFeature === travelFieldKey(entry)}
onTogglePin={() => onTogglePin(travelFieldKey(entry))}
onSetDestination={(slug, label) =>
onTravelTimeSetDestination(index, slug, label)
}
onSetDestination={(slug, label) => onTravelTimeSetDestination(index, slug, label)}
onTimeRangeChange={(range) => onTravelTimeRangeChange(index, range)}
onToggleBest={() => onTravelTimeToggleBest(index)}
onRemove={() => onTravelTimeRemoveEntry(index)}
/>
</div>
))}
</div>
)}
</div>
)}
{enabledFeatureList.length === 0 && activeEntryCount === 0 && (
<p className="px-3 py-1.5 text-xs text-warm-400 dark:text-warm-500">
Browse features below and click + to add a filter
</p>
)}
{enabledGroups.map((group) => {
const isExpanded = !collapsedGroups.has(group.name);
return (
<div key={group.name}>
<CollapsibleGroupHeader
name={group.name}
expanded={isExpanded}
onToggle={() => toggleGroup(group.name)}
className="px-3 py-2.5 text-sm font-bold text-navy-950 bg-warm-200 dark:bg-navy-900 dark:text-warm-100 sticky top-0 z-10 hover:bg-warm-200 dark:hover:bg-warm-800"
>
<span className="text-xs font-medium text-warm-400 dark:text-warm-500">
{group.features.length}
</span>
</CollapsibleGroupHeader>
{isExpanded && (
<div className="px-2 py-1 space-y-1">
{group.features.map((feature) => {
{enabledFeatureList.map((feature) => {
if (feature.type === 'enum') {
const selectedValues = (filters[feature.name] as string[]) || [];
const allValues = feature.values || [];
@ -436,13 +486,10 @@ export default memo(function Filters({
<div
key={feature.name}
data-filter-name={feature.name}
className={`scroll-mt-10 space-y-0.5 px-2 py-1.5 rounded ${pinnedFeature === feature.name ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
className={`space-y-0.5 px-2 py-1.5 rounded ${pinnedFeature === feature.name ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="flex items-center justify-between">
<FeatureLabel
feature={feature}
size="sm"
/>
<FeatureLabel feature={feature} size="sm" />
<FeatureActions
feature={feature}
isPinned={pinnedFeature === feature.name}
@ -484,30 +531,28 @@ export default memo(function Filters({
const scale = percentileScales.get(feature.name);
const dataMin = hist?.min ?? feature.min!;
const dataMax = hist?.max ?? feature.max!;
const isAtMin = displayValue[0] <= dataMin;
const isAtMax = displayValue[1] >= dataMax;
const clampMin = displayValue[0] <= dataMin;
const clampMax = displayValue[1] >= dataMax;
const isAtMin = displayValue[0] === dataMin;
const isAtMax = displayValue[1] === dataMax;
const sliderValue: [number, number] = scale
? [
isAtMin ? 0 : Math.round(scale.toPercentile(displayValue[0])),
isAtMax ? 100 : Math.round(scale.toPercentile(displayValue[1])),
clampMin ? 0 : Math.round(scale.toPercentile(displayValue[0])),
clampMax ? 100 : Math.round(scale.toPercentile(displayValue[1])),
]
: [
isAtMin ? feature.min! : displayValue[0],
isAtMax ? feature.max! : displayValue[1],
clampMin ? feature.min! : displayValue[0],
clampMax ? feature.max! : displayValue[1],
];
return (
<div
key={feature.name}
data-filter-name={feature.name}
className={`scroll-mt-10 space-y-0.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
className={`space-y-0.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="flex items-center justify-between gap-1">
<FeatureLabel
feature={feature}
size="sm"
className="min-w-0 shrink"
/>
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" />
<FeatureActions
feature={feature}
isPinned={isPinned}
@ -520,9 +565,7 @@ export default memo(function Filters({
<Slider
min={scale ? 0 : feature.min!}
max={scale ? 100 : feature.max!}
step={
scale ? 1 : (feature.step ?? (feature.max! - feature.min!) / 100)
}
step={scale ? 1 : (feature.step ?? (feature.max! - feature.min!) / 100)}
value={sliderValue}
onValueChange={
scale
@ -530,9 +573,7 @@ export default memo(function Filters({
const step = feature.step ?? 1;
const snap = (v: number) => Math.round(v / step) * step;
onDragChange([
pMin <= 0
? (hist?.min ?? feature.min!)
: snap(scale.toValue(pMin)),
pMin <= 0 ? (hist?.min ?? feature.min!) : snap(scale.toValue(pMin)),
pMax >= 100
? (hist?.max ?? feature.max!)
: snap(scale.toValue(pMax)),
@ -555,23 +596,31 @@ export default memo(function Filters({
isAtMin={isAtMin}
isAtMax={isAtMax}
raw={feature.raw}
feature={feature}
onValueChange={(v) => onFilterChange(feature.name, v)}
/>
</div>
</div>
);
})}
</div>
)}
</div>
);
})}
</div>
</div>
<div className="shrink-0 md:shrink md:min-h-0 flex flex-col md:basis-[60%] border-t border-warm-200 dark:border-warm-700">
<div className="shrink-0 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Add Filter</span>
</div>
<div
className={`shrink-0 md:shrink md:min-h-0 flex flex-col border-t border-warm-200 dark:border-warm-700 ${addFilterCollapsed ? '' : 'md:basis-[60%]'}`}
>
<button
onClick={() => setAddFilterCollapsed((v) => !v)}
className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700 bg-teal-50 dark:bg-teal-900/30 cursor-pointer hover:bg-teal-100 dark:hover:bg-teal-900/50"
>
<span className="text-sm font-semibold text-teal-700 dark:text-teal-400">Add Filter</span>
<ChevronIcon
direction={addFilterCollapsed ? 'down' : 'up'}
className="w-4 h-4 text-warm-400 dark:text-warm-500"
/>
</button>
{!addFilterCollapsed && (
<div className="md:min-h-0 md:flex-1 flex flex-col">
<FeatureBrowser
availableFeatures={availableFeatures}
@ -588,6 +637,7 @@ export default memo(function Filters({
onUpgradeClick={onUpgradeClick}
/>
</div>
)}
</div>
{showPhilosophy && (
@ -595,79 +645,72 @@ export default memo(function Filters({
<div className="space-y-4 text-sm">
<p className="text-warm-600 dark:text-warm-300">
Start with your must-haves, then layer on nice-to-haves. The map narrows down as you
add filters &mdash; the areas that survive are your best matches.
add filters. The areas that survive are your best matches.
</p>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
1. Budget &amp; property basics
</h4>
<div className="space-y-2">
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
1
</span>
<p className="text-warm-600 dark:text-warm-300">
Set your price range, minimum floor area, and property type. If you need a lease
over freehold (or vice versa), filter for that too. This eliminates most of the map
immediately.
<span className="font-semibold text-navy-950 dark:text-warm-100">
Budget &amp; basics
</span>{' '}
(price range, floor area, property type)
</p>
</div>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
2. Commute &amp; transport
</h4>
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
2
</span>
<p className="text-warm-600 dark:text-warm-300">
Add a travel time filter to your workplace &mdash; choose public transport or
cycling and set your maximum tolerable commute. You can also filter by how many
stations are within walking distance.
<span className="font-semibold text-navy-950 dark:text-warm-100">Commute</span>{' '}
(travel time to your workplace by car, bike, or transit)
</p>
</div>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
3. Safety &amp; environment
</h4>
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
3
</span>
<p className="text-warm-600 dark:text-warm-300">
Use the crime filters to cap serious or minor crime rates. Check road noise levels
if you&apos;re a light sleeper, and environmental risk filters for ground stability
concerns.
<span className="font-semibold text-navy-950 dark:text-warm-100">Safety</span>{' '}
(crime rates, noise levels, ground stability)
</p>
</div>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
4. Schools &amp; education
</h4>
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
4
</span>
<p className="text-warm-600 dark:text-warm-300">
Filter by the number of Ofsted-rated Good or Outstanding primary and secondary
schools nearby. The education deprivation score captures broader area-level
attainment.
<span className="font-semibold text-navy-950 dark:text-warm-100">Schools</span>{' '}
(nearby Ofsted-rated Good or Outstanding schools)
</p>
</div>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
5. Lifestyle &amp; amenities
</h4>
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
5
</span>
<p className="text-warm-600 dark:text-warm-300">
Want restaurants, parks, or grocery shops within walking distance? Filter by nearby
amenity counts. Broadband speed filters help if you work from home.
<span className="font-semibold text-navy-950 dark:text-warm-100">Lifestyle</span>{' '}
(restaurants, parks, broadband speed)
</p>
</div>
<div>
<h4 className="font-semibold text-navy-950 dark:text-warm-100 mb-1">
6. Energy &amp; running costs
</h4>
<div className="flex gap-2">
<span className="shrink-0 w-5 h-5 rounded-full bg-teal-600 text-white flex items-center justify-center text-xs font-bold">
6
</span>
<p className="text-warm-600 dark:text-warm-300">
EPC ratings from A to G indicate energy efficiency. Filter for better ratings to
find homes with lower bills and fewer upgrade headaches.
<span className="font-semibold text-navy-950 dark:text-warm-100">Energy</span>{' '}
(EPC ratings for lower bills and fewer surprises)
</p>
</div>
</div>
<div className="pt-1 border-t border-warm-200 dark:border-warm-700">
<p className="text-warm-500 dark:text-warm-400 italic">
Tip: if nothing survives your filters, relax one constraint at a time to see which
compromise unlocks the most options.
<p className="text-warm-500 dark:text-warm-400 italic text-xs">
Tip: if nothing survives, relax one constraint at a time to see which compromise
unlocks the most options.
</p>
</div>
{onResetTutorial && (
<button

View file

@ -20,7 +20,7 @@ export default function HistogramLegend() {
<div className="w-3 h-px border-t border-dashed border-warm-500 dark:border-warm-400" />
<span className="text-warm-700 dark:text-warm-300">
<span className="font-medium text-warm-900 dark:text-warm-100">Dashed line</span>{' '}
indicates the global average
indicates the national average
</span>
</div>
</div>

View file

@ -111,7 +111,7 @@ export default memo(function HoverCard({
)}
{/* Hint */}
<div className="text-[10px] text-warm-400 dark:text-warm-400 mt-2 text-center">
<div className="text-[10px] text-warm-400 dark:text-warm-500 mt-2 text-center">
Click for details
</div>
</div>

View file

@ -64,6 +64,28 @@ function getRouteDisplay(mode: string): { label: string; color: string; darkText
return { label: clean, color: '#6b7280', darkText: false };
}
/** Returns a Unix timestamp for the next Monday at 07:30 local time. */
function nextMondayAt730(): number {
const now = new Date();
const day = now.getDay(); // 0=Sun … 6=Sat
const daysUntil = day === 0 ? 1 : day === 1 ? 7 : 8 - day;
const monday = new Date(now);
monday.setDate(now.getDate() + daysUntil);
monday.setHours(7, 30, 0, 0);
return Math.floor(monday.getTime() / 1000);
}
function googleMapsUrl(postcode: string, destination: string): string {
const ts = nextMondayAt730();
const origin = encodeURIComponent(postcode);
const dest = encodeURIComponent(destination);
// The official api=1 URL scheme doesn't support departure_time.
// Use the undocumented data= path parameter with protobuf-like encoding:
// !3e3 = transit, !6e0 = "depart at", !7e2 = local time, !8j = timestamp
const data = `!4m6!4m5!2m3!6e0!7e2!8j${ts}!3e3`;
return `https://www.google.com/maps/dir/${origin}/${dest}/data=${data}`;
}
function invertLegs(legs: JourneyLeg[]): JourneyLeg[] {
return [...legs]
.reverse()
@ -219,7 +241,7 @@ export default function JourneyInstructions({
<span className="text-xs font-medium text-warm-700 dark:text-warm-300">
To {j.label || j.slug}
</span>
{displayLegs && displayLegs.length > 0 && (
{!j.loading && totalMin > 0 && (
<span className="text-xs font-semibold text-teal-700 dark:text-teal-400">
{totalMin} min
</span>
@ -235,6 +257,57 @@ export default function JourneyInstructions({
{displayLegs.map((leg, i) => (
<TimelineLeg key={i} leg={leg} isLast={i === displayLegs.length - 1} />
))}
<a
href={googleMapsUrl(postcode, j.label || j.slug)}
target="_blank"
rel="noopener noreferrer"
className="mt-2 flex items-center justify-center gap-1.5 w-full text-[11px] font-medium text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 bg-white dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded-md py-1.5 transition-colors"
>
View on Google Maps
<svg
className="w-3 h-3"
viewBox="0 0 12 12"
fill="none"
stroke="currentColor"
strokeWidth="1.5"
>
<path
d="M4.5 1.5H2a.5.5 0 00-.5.5v8a.5.5 0 00.5.5h8a.5.5 0 00.5-.5V7.5M7.5 1.5H10.5V4.5M10.5 1.5L5.5 6.5"
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
</a>
</div>
) : j.minutes != null ? (
<div>
<div className="flex items-center gap-1.5 py-0.5">
<WalkingIcon className="w-3.5 h-3.5 text-warm-500 dark:text-warm-400 shrink-0" />
<span className="text-xs text-warm-600 dark:text-warm-300">
Walk · {j.minutes} min
</span>
</div>
<a
href={googleMapsUrl(postcode, j.label || j.slug)}
target="_blank"
rel="noopener noreferrer"
className="mt-2 flex items-center justify-center gap-1.5 w-full text-[11px] font-medium text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 bg-white dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded-md py-1.5 transition-colors"
>
View on Google Maps
<svg
className="w-3 h-3"
viewBox="0 0 12 12"
fill="none"
stroke="currentColor"
strokeWidth="1.5"
>
<path
d="M4.5 1.5H2a.5.5 0 00-.5.5v8a.5.5 0 00.5.5h8a.5.5 0 00.5-.5V7.5M7.5 1.5H10.5V4.5M10.5 1.5L5.5 6.5"
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
</a>
</div>
) : (
<span className="text-xs text-warm-500 dark:text-warm-400">

View file

@ -25,6 +25,7 @@ import LocationSearch, { type SearchedLocation } from './LocationSearch';
import MapLegend from './MapLegend';
import HoverCard from './HoverCard';
import { LogoIcon } from '../ui/icons/LogoIcon';
import { CloseIcon } from '../ui/icons/CloseIcon';
import type { FeatureFilters } from '../../types';
import { useDeckLayers } from '../../hooks/useDeckLayers';
import { MODE_LABELS, type TravelTimeEntry } from '../../hooks/useTravelTime';
@ -167,6 +168,7 @@ export default memo(function Map({
const {
layers,
popupInfo,
clearPopupInfo,
hoverPosition,
countRange,
postcodeCountRange,
@ -309,7 +311,7 @@ export default memo(function Map({
))}
{popupInfo && (
<div
className="absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg text-sm dark:text-white pointer-events-none"
className="absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg text-sm dark:text-white"
style={{
left: popupInfo.x,
top: popupInfo.y - 50,
@ -317,6 +319,12 @@ export default memo(function Map({
zIndex: 9999,
}}
>
<button
className="absolute -top-2 -right-2 w-5 h-5 flex items-center justify-center rounded-full bg-warm-200 dark:bg-warm-700 text-warm-500 dark:text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shadow-sm"
onClick={clearPopupInfo}
>
<CloseIcon className="w-3 h-3" />
</button>
{popupInfo.isCluster ? (
<div className="px-3 py-2 text-center">
<div className="text-lg font-bold text-teal-600 dark:text-teal-400">

View file

@ -72,6 +72,7 @@ interface MapPageProps {
onUnsaveProperty?: (id: string) => void;
isPropertySaved?: (address?: string, postcode?: string) => boolean;
getSavedPropertyId?: (address?: string, postcode?: string) => string | undefined;
deferTutorial?: boolean;
}
export default function MapPage({
@ -99,6 +100,7 @@ export default function MapPage({
onUnsaveProperty,
isPropertySaved,
getSavedPropertyId,
deferTutorial = false,
}: MapPageProps) {
const [selectedPOICategories, setSelectedPOICategories] =
useState<Set<string>>(initialPOICategories);
@ -153,6 +155,14 @@ export default function MapPage({
const handleAiFilterSubmit = useCallback(
async (query: string) => {
// Derive current listing type from Listing status filter
const listingVal = filters['Listing status'] as string[] | undefined;
const listingType = listingVal?.includes('For sale')
? 'buy'
: listingVal?.includes('For rent')
? 'rent'
: 'historical';
// Build context from current filters for conversational refinement
const context = {
filters,
@ -165,7 +175,11 @@ export default function MapPage({
};
const hasContext = Object.keys(context.filters).length > 0 || context.travelTime.length > 0;
const result = await aiFilters.fetchAiFilters(query, hasContext ? context : undefined);
const result = await aiFilters.fetchAiFilters(
query,
hasContext ? context : undefined,
listingType
);
if (!result) return;
handleSetFilters(result.filters);
// Always sync travel time entries — clear stale ones when AI returns none
@ -354,7 +368,7 @@ export default function MapPage({
selection.areaStats?.central_postcode,
]);
const tutorial = useTutorial(initialLoading, isMobile);
const tutorial = useTutorial(initialLoading, isMobile, deferTutorial);
const [exporting, setExporting] = useState(false);
const handleExport = useCallback(() => {
@ -418,7 +432,14 @@ export default function MapPage({
? mapData.postcodeData.length > 0
: mapData.data.length > 0;
if (hasData) {
// Wait for deck.gl to actually paint: in interleaved MapboxOverlay mode,
// hexagons render during MapLibre's rAF cycle. Double-rAF ensures at
// least one full paint has completed before we signal readiness.
requestAnimationFrame(() => {
requestAnimationFrame(() => {
window.__screenshot_ready = true;
});
});
}
}
}, [

View file

@ -84,7 +84,7 @@ export default function POIPane({
const selectedCount = selectedCategories.size;
return (
<div className="flex flex-col h-full bg-white dark:bg-navy-950 shadow-lg overflow-hidden">
<div className="flex flex-col h-full bg-white dark:bg-warm-900 shadow-lg overflow-hidden">
<div className="flex-shrink-0 px-3 pt-3 pb-2">
<div className="flex items-center gap-2">
<span className="text-xs font-semibold text-warm-500 dark:text-warm-400 uppercase tracking-wide">

View file

@ -52,7 +52,7 @@ export function PropertiesPane({
<EmptyState
icon={<InfoIcon className="w-8 h-8 text-warm-300 dark:text-warm-600" />}
title="No area selected"
description="Click a hexagon or postcode to view area statistics"
description="Click any coloured area on the map to see crime, schools, prices, and more"
centered
/>
);
@ -77,10 +77,9 @@ export function PropertiesPane({
}
>
<p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">
Property data combines Energy Performance Certificates (EPC) with HM Land Registry Price
Paid records, fuzzy-matched by address within each postcode. Includes floor area, energy
ratings, construction year, and tenure from EPC surveys, plus the most recent sale price
from the Land Registry.
Prices come from HM Land Registry (what buyers actually paid). Floor area, energy
ratings, construction year, and tenure come from official EPC surveys. Both sources are
matched by address within each postcode.
</p>
</InfoPopup>
)}

View file

@ -4,24 +4,13 @@ import { IconButton } from '../ui/IconButton';
import { PillToggle } from '../ui/PillToggle';
import { DestinationDropdown } from '../ui/DestinationDropdown';
import InfoPopup from '../ui/InfoPopup';
import { TravelTimeInfoPopup } from '../ui/TravelTimeInfoPopup';
import { CloseIcon } from '../ui/icons/CloseIcon';
import { EyeIcon } from '../ui/icons/EyeIcon';
import { InfoIcon } from '../ui/icons/InfoIcon';
import { CarIcon } from '../ui/icons/CarIcon';
import { BicycleIcon } from '../ui/icons/BicycleIcon';
import { WalkingIcon } from '../ui/icons/WalkingIcon';
import { TransitIcon } from '../ui/icons/TransitIcon';
import { formatFilterValue } from '../../lib/format';
import { useTravelDestinations } from '../../hooks/useTravelDestinations';
import { MODE_LABELS, type TransportMode } from '../../hooks/useTravelTime';
import type { ComponentType } from 'react';
const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
import { MODE_LABELS, MODE_ICONS, type TransportMode } from '../../hooks/useTravelTime';
interface TravelTimeCardProps {
mode: TransportMode;
@ -78,15 +67,11 @@ export function TravelTimeCard({
<span className="text-sm font-medium text-navy-950 dark:text-warm-100">
Travel Time ({MODE_LABELS[mode]})
</span>
<button
onClick={() => setShowInfo(true)}
className="p-1 -m-0.5 rounded text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 hover:bg-warm-100 dark:hover:bg-warm-700 shrink-0"
title="Feature info"
>
<InfoIcon className="w-3.5 h-3.5" />
</button>
</div>
<div className="flex items-center gap-0.5">
<IconButton onClick={() => setShowInfo(true)} title="Feature info">
<InfoIcon className="w-3.5 h-3.5" />
</IconButton>
{slug && (
<IconButton
onClick={onTogglePin}
@ -122,21 +107,7 @@ export function TravelTimeCard({
</div>
)}
{showInfo && (
<InfoPopup title={`Travel Time (${MODE_LABELS[mode]})`} onClose={() => setShowInfo(false)}>
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">
Shows how long it takes to reach the selected destination from each area
{mode === 'transit'
? ' by public transport (bus, rail, tube). Times are computed across a typical weekday morning window.'
: mode === 'car'
? ' by car, based on typical road speeds and the road network.'
: mode === 'bicycle'
? ' by bicycle, using cycle-friendly routes.'
: ' on foot, using pedestrian paths and pavements.'}{' '}
Use the slider to filter areas within your preferred commute time.
</p>
</InfoPopup>
)}
{showInfo && <TravelTimeInfoPopup mode={mode} onClose={() => setShowInfo(false)} />}
{showBestInfo && (
<InfoPopup title="Best case travel time" onClose={() => setShowBestInfo(false)}>

View file

@ -87,7 +87,7 @@ export default function AuthModal({
if (e.target === e.currentTarget) onClose();
}}
>
<div className="absolute inset-0 bg-black/50 dark:bg-black/70" onMouseDown={onClose} />
<div className="absolute inset-0 bg-black/50 dark:bg-black/70" />
<div className="relative w-full max-w-sm mx-4 bg-white dark:bg-warm-900 rounded-lg shadow-xl border border-warm-200 dark:border-warm-700">
{/* Header */}
<div className="flex items-center justify-between px-5 pt-5 pb-3">
@ -127,6 +127,13 @@ export default function AuthModal({
)}
<div className="p-5 space-y-4">
{/* Value prop */}
{view !== 'forgot' && (
<p className="text-xs text-warm-500 dark:text-warm-400 text-center">
Save searches, bookmark properties, and pick up where you left off.
</p>
)}
{/* OAuth buttons (hidden in forgot view) */}
{view !== 'forgot' && (
<>

View file

@ -22,8 +22,8 @@ export function FeatureActions({
return (
<div className="flex items-center gap-0.5 shrink-0">
{feature.detail && onShowInfo && (
<IconButton onClick={() => onShowInfo(feature)} title="Feature info">
<InfoIcon />
<IconButton onClick={() => onShowInfo(feature)} title="Feature info" size="md">
<InfoIcon className="w-7 h-7 md:w-3.5 md:h-3.5" />
</IconButton>
)}
<IconButton

View file

@ -38,7 +38,7 @@ export function FeatureLabel({
{featureIcon}
{GroupIcon && <GroupIcon className={iconClass} />}
<span
className={`${textClass} text-warm-700 dark:text-warm-300 ${size === 'xs' ? 'truncate' : ''}`}
className={`${textClass} ${size === 'sm' ? 'font-medium text-navy-950 dark:text-warm-100' : 'text-warm-700 dark:text-warm-300 truncate'}`}
>
{feature.name}
</span>

View file

@ -1,6 +1,6 @@
import { useState, useCallback, useEffect } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import { shortenUrl } from '../../lib/api';
import { shortenUrl, prewarmScreenshot } from '../../lib/api';
import { copyToClipboard } from '../../lib/clipboard';
import { DownloadIcon } from './icons/DownloadIcon';
import { BookmarkIcon } from './icons/BookmarkIcon';
@ -96,6 +96,7 @@ export default function Header({
doCopy(window.location.href);
return;
}
prewarmScreenshot(params);
setSharing(true);
try {
const shortUrl = await shortenUrl(params);
@ -144,22 +145,13 @@ export default function Header({
Dashboard
</a>
{user && (
<>
<a
href={PAGE_PATHS.saved}
className={tabClass('saved')}
onClick={(e) => navLink('saved', e)}
>
Saved
</a>
<a
href={PAGE_PATHS.invites}
className={tabClass('invites')}
onClick={(e) => navLink('invites', e)}
>
Invite
Invite Friends
</a>
</>
)}
<a
href={PAGE_PATHS.learn}
@ -177,6 +169,15 @@ export default function Header({
Pricing
</a>
)}
{user && (
<a
href={PAGE_PATHS.saved}
className={tabClass('saved')}
onClick={(e) => navLink('saved', e)}
>
Saved
</a>
)}
</nav>
)}
</div>
@ -238,7 +239,13 @@ export default function Header({
{!isMobile && (
<>
{user ? (
<UserMenu user={user} theme={theme} onToggleTheme={onToggleTheme} onLogout={onLogout} />
<UserMenu
user={user}
theme={theme}
onToggleTheme={onToggleTheme}
onLogout={onLogout}
onNavigate={onPageChange}
/>
) : (
<>
<button

View file

@ -23,7 +23,7 @@ export default function InfoPopup({ title, children, onClose, sourceLink }: Info
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30 p-4">
<div
ref={popupRef}
className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full max-h-full overflow-y-auto p-5"
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg shadow-xl max-w-md w-full max-h-full overflow-y-auto p-5"
>
<div className="flex items-start justify-between mb-3">
<h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">{title}</h3>

View file

@ -88,9 +88,9 @@ export default function MobileMenu({
{user?.subscription !== 'licensed' &&
!user?.isAdmin &&
mobileNavItem('pricing', 'Pricing')}
{user && mobileNavItem('saved', 'Saved')}
{user && mobileNavItem('invites', 'Invite')}
{user && mobileNavItem('invites', 'Invite Friends')}
{user && mobileNavItem('account', 'Account')}
{user && mobileNavItem('saved', 'Saved')}
{/* Dashboard actions */}
{activePage === 'dashboard' && (

View file

@ -0,0 +1,27 @@
import InfoPopup from './InfoPopup';
import { MODE_LABELS, type TransportMode } from '../../hooks/useTravelTime';
const MODE_INFO: Record<TransportMode, string> = {
transit:
' by public transport (bus, rail, tube). Times are computed across a typical weekday morning window.',
car: ' by car, based on typical road speeds and the road network.',
bicycle: ' by bicycle, using cycle-friendly routes.',
walking: ' on foot, using pedestrian paths and pavements.',
};
export function TravelTimeInfoPopup({
mode,
onClose,
}: {
mode: TransportMode;
onClose: () => void;
}) {
return (
<InfoPopup title={`Travel Time (${MODE_LABELS[mode]})`} onClose={onClose}>
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">
Shows how long it takes to reach the selected destination from each area
{MODE_INFO[mode]} Use the slider to filter areas within your preferred commute time.
</p>
</InfoPopup>
);
}

View file

@ -60,9 +60,10 @@ export default function UpgradeModal({
{/* Header */}
<div className="bg-gradient-to-br from-navy-950 to-teal-900 px-6 py-8 text-center">
<h2 className="text-2xl font-bold text-white mb-2">Unlock the full map</h2>
<h2 className="text-2xl font-bold text-white mb-2">See all of England</h2>
<p className="text-warm-300 text-sm">
Free users can explore inner London. Upgrade for lifetime access to all of England.
You&apos;re currently exploring inner London. Get lifetime access to every postcode,
every filter, every neighbourhood. One payment, forever.
</p>
</div>
@ -118,7 +119,7 @@ export default function UpgradeModal({
onClick={onZoomToFreeZone}
className="w-full mt-4 text-center text-sm text-warm-400 dark:text-warm-500 hover:text-warm-600 dark:hover:text-warm-400"
>
Or zoom back to demo area
Or continue exploring inner London
</button>
</div>
</div>

View file

@ -1,5 +1,7 @@
import { useState, useRef, useEffect } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import type { Page } from './Header';
import { PAGE_PATHS } from './Header';
import { SunIcon } from './icons/SunIcon';
import { MoonIcon } from './icons/MoonIcon';
@ -8,11 +10,13 @@ export default function UserMenu({
theme,
onToggleTheme,
onLogout,
onNavigate,
}: {
user: AuthUser;
theme: 'light' | 'dark';
onToggleTheme: () => void;
onLogout: () => void;
onNavigate: (page: Page) => void;
}) {
const [open, setOpen] = useState(false);
const menuRef = useRef<HTMLDivElement>(null);
@ -55,7 +59,7 @@ export default function UserMenu({
: 'bg-warm-100 text-warm-500 dark:bg-warm-700 dark:text-warm-400'
}`}
>
{user.subscription === 'licensed' || user.isAdmin ? 'Pro' : 'Free'}
{user.subscription === 'licensed' || user.isAdmin ? 'Full Access' : 'Inner London'}
</span>
</div>
</div>
@ -72,8 +76,13 @@ export default function UserMenu({
Theme: {theme === 'light' ? 'Light' : 'Dark'}
</button>
<a
href="/account"
onClick={() => setOpen(false)}
href={PAGE_PATHS.account}
onClick={(e) => {
if (e.metaKey || e.ctrlKey || e.shiftKey || e.button !== 0) return;
e.preventDefault();
setOpen(false);
onNavigate('account');
}}
className="block w-full text-left px-3 py-2 text-sm text-warm-700 dark:text-warm-300 hover:bg-warm-50 dark:hover:bg-warm-700 rounded"
>
Account

View file

@ -1,53 +0,0 @@
import { useState, useCallback } from 'react';
import { SpinnerIcon } from './icons/SpinnerIcon';
export default function VerificationBanner({
email,
onRequestVerification,
onDismiss,
}: {
email: string;
onRequestVerification: (email: string) => Promise<void>;
onDismiss: () => void;
}) {
const [sending, setSending] = useState(false);
const [sent, setSent] = useState(false);
const handleResend = useCallback(async () => {
setSending(true);
try {
await onRequestVerification(email);
setSent(true);
setTimeout(() => setSent(false), 3000);
} catch {
// Error handled by hook
} finally {
setSending(false);
}
}, [email, onRequestVerification]);
return (
<div className="bg-amber-50 dark:bg-amber-900/20 border-b border-amber-200 dark:border-amber-800 px-4 py-2.5 flex items-center justify-between gap-3">
<p className="text-sm text-amber-800 dark:text-amber-200">
Please verify your email address. Check your inbox.
</p>
<div className="flex items-center gap-2 shrink-0">
<button
onClick={handleResend}
disabled={sending || sent}
className="text-sm font-medium text-amber-700 dark:text-amber-300 hover:text-amber-900 dark:hover:text-amber-100 disabled:opacity-50 flex items-center gap-1"
>
{sending && <SpinnerIcon className="w-3.5 h-3.5 animate-spin" />}
{sent ? 'Sent!' : 'Resend'}
</button>
<button
onClick={onDismiss}
className="text-amber-400 dark:text-amber-600 hover:text-amber-600 dark:hover:text-amber-400 text-lg leading-none"
aria-label="Dismiss"
>
&times;
</button>
</div>
</div>
);
}

View file

@ -17,9 +17,11 @@ export interface AiFiltersResult {
notes: string;
/** Human-readable summary of what was set */
summary: string;
/** The listing mode used (historical/buy/rent) */
listingType: string;
}
export type AiFilterErrorType = 'auth' | 'verification' | 'limit' | 'error';
export type AiFilterErrorType = 'auth' | 'limit' | 'error';
/** Context of currently active filters, sent for conversational refinement. */
export interface AiFiltersContext {
@ -28,7 +30,11 @@ export interface AiFiltersContext {
}
interface UseAiFiltersResult {
fetchAiFilters: (query: string, context?: AiFiltersContext) => Promise<AiFiltersResult | null>;
fetchAiFilters: (
query: string,
context?: AiFiltersContext,
listingType?: string
) => Promise<AiFiltersResult | null>;
loading: boolean;
error: string | null;
errorType: AiFilterErrorType | null;
@ -41,6 +47,8 @@ function buildSummary(filters: FeatureFilters, travelTimeFilters: AiTravelTimeFi
const parts: string[] = [];
for (const [name, value] of Object.entries(filters)) {
// Skip Listing status — shown via the mode selector UI
if (name === 'Listing status') continue;
if (Array.isArray(value) && value.length === 2 && typeof value[0] === 'number') {
parts.push(name);
} else if (Array.isArray(value)) {
@ -67,7 +75,11 @@ export function useAiFilters(): UseAiFiltersResult {
const abortRef = useRef<AbortController | null>(null);
const fetchAiFilters = useCallback(
async (query: string, context?: AiFiltersContext): Promise<AiFiltersResult | null> => {
async (
query: string,
context?: AiFiltersContext,
listingType?: string
): Promise<AiFiltersResult | null> => {
abortRef.current?.abort();
const controller = new AbortController();
abortRef.current = controller;
@ -81,6 +93,7 @@ export function useAiFilters(): UseAiFiltersResult {
try {
const url = apiUrl('ai-filters');
const bodyObj: Record<string, unknown> = { query };
if (listingType) bodyObj.listing_type = listingType;
if (context) {
bodyObj.context = {
filters: context.filters,
@ -102,9 +115,6 @@ export function useAiFilters(): UseAiFiltersResult {
if (response.status === 401) {
setErrorType('auth');
setError(text || 'Login required');
} else if (response.status === 403) {
setErrorType('verification');
setError(text || 'Email verification required');
} else if (response.status === 429) {
setErrorType('limit');
setError(text || 'Weekly usage limit reached');
@ -133,6 +143,7 @@ export function useAiFilters(): UseAiFiltersResult {
travelTimeFilters,
notes: json.notes || '',
summary: summaryText,
listingType: json.listing_type || 'historical',
};
setNotes(result.notes || null);
setSummary(summaryText);

View file

@ -5,7 +5,6 @@ import { trackEvent } from '../lib/analytics';
export interface AuthUser {
id: string;
email: string;
verified: boolean;
isAdmin: boolean;
subscription: string;
newsletter: boolean;
@ -18,7 +17,6 @@ function recordToUser(record: { id: string; [key: string]: unknown }): AuthUser
return {
id: record.id,
email: record.email,
verified: typeof record.verified === 'boolean' ? record.verified : false,
isAdmin: typeof record.is_admin === 'boolean' ? record.is_admin : false,
subscription: typeof record.subscription === 'string' ? record.subscription : 'free',
newsletter: typeof record.newsletter === 'boolean' ? record.newsletter : false,
@ -136,20 +134,6 @@ export function useAuth() {
}
}, []);
const requestVerification = useCallback(async (email: string) => {
setLoading(true);
setError(null);
try {
await pb.collection('users').requestVerification(email);
} catch (err) {
const msg = err instanceof Error ? err.message : 'Verification request failed';
setError(msg);
throw err;
} finally {
setLoading(false);
}
}, []);
const clearError = useCallback(() => {
setError(null);
}, []);
@ -163,7 +147,6 @@ export function useAuth() {
loginWithOAuth,
logout,
requestPasswordReset,
requestVerification,
refreshAuth,
clearError,
};

View file

@ -95,7 +95,7 @@ export function useDeckLayers({
useEffect(() => {
if (!hasSelection) return;
setMarchTime(0);
const id = setInterval(() => setMarchTime((t) => t + 0.3), 50);
const id = setInterval(() => setMarchTime((t) => (t + 0.3) % 10000), 50);
return () => clearInterval(id);
}, [hasSelection]);
@ -771,9 +771,12 @@ export function useDeckLayers({
onHexagonHoverRef.current(null);
}, []);
const clearPopupInfo = useCallback(() => setPopupInfo(null), []);
return {
layers,
popupInfo,
clearPopupInfo,
hoverPosition,
countRange,
postcodeCountRange,

View file

@ -1,4 +1,4 @@
import { useState, useCallback, useMemo, useRef } from 'react';
import { useState, useCallback, useMemo, useRef, useEffect } from 'react';
import type { FeatureMeta, FeatureFilters } from '../types';
import { trackEvent } from '../lib/analytics';
@ -15,6 +15,7 @@ export function useFilters({ initialFilters, features }: UseFiltersOptions) {
const pendingDragRef = useRef<string | null>(null);
const dragActiveRef = useRef<string | null>(null);
const dragValueRef = useRef<[number, number] | null>(null);
const undoStackRef = useRef<FeatureFilters[]>([]);
const enabledFeatures = useMemo(() => new Set(Object.keys(filters)), [filters]);
@ -34,17 +35,41 @@ export function useFilters({ initialFilters, features }: UseFiltersOptions) {
const meta = features.find((f) => f.name === name);
if (!meta) return;
trackEvent('Filter Add', { feature: name });
setFilters((prev) => {
undoStackRef.current.push(prev);
if (undoStackRef.current.length > 50) undoStackRef.current.shift();
if (meta.type === 'enum' && meta.values) {
setFilters((prev) => ({ ...prev, [name]: [...meta.values!] }));
return { ...prev, [name]: [...meta.values!] };
} else if (meta.type === 'numeric' && meta.histogram) {
setFilters((prev) => ({ ...prev, [name]: [meta.histogram!.min, meta.histogram!.max] }));
return { ...prev, [name]: [meta.histogram!.min, meta.histogram!.max] };
} else if (meta.min != null && meta.max != null) {
setFilters((prev) => ({ ...prev, [name]: [meta.min!, meta.max!] }));
return { ...prev, [name]: [meta.min!, meta.max!] };
}
return prev;
});
},
[features]
);
const handleUndo = useCallback(() => {
const prev = undoStackRef.current.pop();
if (prev) setFilters(prev);
}, []);
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
const target = e.target as HTMLElement;
if (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA' || target.isContentEditable)
return;
e.preventDefault();
handleUndo();
}
};
window.addEventListener('keydown', handler);
return () => window.removeEventListener('keydown', handler);
}, [handleUndo]);
const handleFilterChange = useCallback((name: string, value: [number, number] | string[]) => {
setFilters((prev) => ({ ...prev, [name]: value }));
}, []);

View file

@ -1,4 +1,4 @@
import { useState, useCallback } from 'react';
import { useState, useCallback, useRef, useEffect } from 'react';
import pb from '../lib/pocketbase';
import { apiUrl, authHeaders } from '../lib/api';
import { trackEvent } from '../lib/analytics';
@ -12,23 +12,37 @@ export interface SavedSearch {
created: string;
}
const POLL_INTERVAL_MS = 2000;
const MAX_POLL_ATTEMPTS = 15;
export function useSavedSearches(userId: string | null) {
const [searches, setSearches] = useState<SavedSearch[]>([]);
const [loading, setLoading] = useState(false);
const [saving, setSaving] = useState(false);
const [error, setError] = useState<string | null>(null);
const fetchSearches = useCallback(async () => {
if (!userId) return;
setLoading(true);
setError(null);
try {
const pollTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const pollAttemptsRef = useRef(0);
const userIdRef = useRef(userId);
userIdRef.current = userId;
const stopPolling = useCallback(() => {
if (pollTimerRef.current) {
clearInterval(pollTimerRef.current);
pollTimerRef.current = null;
}
pollAttemptsRef.current = 0;
}, []);
// Clean up polling on unmount or userId change
useEffect(() => stopPolling, [userId, stopPolling]);
const fetchRecords = useCallback(async (uid: string): Promise<SavedSearch[]> => {
const records = await pb.collection('saved_searches').getFullList({
sort: '-created',
filter: `user = "${userId}"`,
filter: `user = "${uid}"`,
});
setSearches(
records.map((r) => ({
return records.map((r) => ({
id: r.id,
name: (r as Record<string, unknown>).name as string,
params: (r as Record<string, unknown>).params as string,
@ -37,14 +51,55 @@ export function useSavedSearches(userId: string | null) {
: '',
notes: ((r as Record<string, unknown>).notes as string) || '',
created: r.created,
}))
);
}));
}, []);
const startPolling = useCallback(() => {
if (pollTimerRef.current) return;
pollAttemptsRef.current = 0;
pollTimerRef.current = setInterval(async () => {
const uid = userIdRef.current;
if (!uid) {
stopPolling();
return;
}
pollAttemptsRef.current++;
if (pollAttemptsRef.current >= MAX_POLL_ATTEMPTS) {
stopPolling();
return;
}
try {
const mapped = await fetchRecords(uid);
setSearches(mapped);
if (!mapped.some((s) => !s.screenshotUrl)) {
stopPolling();
}
} catch {
// Silent — background poll errors don't surface to UI
}
}, POLL_INTERVAL_MS);
}, [stopPolling, fetchRecords]);
const fetchSearches = useCallback(async () => {
if (!userId) return;
setLoading(true);
setError(null);
try {
const mapped = await fetchRecords(userId);
setSearches(mapped);
// Poll for missing screenshots so they appear without a page refresh
if (mapped.some((s) => !s.screenshotUrl)) {
startPolling();
} else {
stopPolling();
}
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load searches');
} finally {
setLoading(false);
}
}, [userId]);
}, [userId, fetchRecords, startPolling, stopPolling]);
const saveSearch = useCallback(
async (name: string) => {
@ -112,6 +167,15 @@ export function useSavedSearches(userId: string | null) {
}
}, []);
const updateSearchName = useCallback(async (id: string, name: string) => {
try {
await pb.collection('saved_searches').update(id, { name });
setSearches((prev) => prev.map((s) => (s.id === id ? { ...s, name } : s)));
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to update name');
}
}, []);
return {
searches,
loading,
@ -121,5 +185,6 @@ export function useSavedSearches(userId: string | null) {
saveSearch,
deleteSearch,
updateSearchNotes,
updateSearchName,
};
}

View file

@ -0,0 +1,58 @@
import { useEffect, useRef } from 'react';
import { apiUrl } from '../lib/api';
/**
* Sends a telemetry beacon every 30 seconds with session duration
* and the number of active filters (parsed from the URL `f` param).
* On the first beacon, also sends the entry path and referrer domain.
*/
export function useTelemetry() {
const startTime = useRef(Date.now());
const entryPath = useRef(window.location.pathname);
const referrer = useRef(extractReferrerDomain());
const sentEntry = useRef(false);
useEffect(() => {
const send = () => {
const sessionSeconds = Math.round((Date.now() - startTime.current) / 1000);
// Count active filters from URL (filters are encoded as `f=name:min:max;;name:val`)
const params = new URLSearchParams(window.location.search);
const filterStr = params.get('f') || '';
const filterCount = filterStr ? filterStr.split(';;').length : 0;
const payload: Record<string, unknown> = {
session_seconds: sessionSeconds,
filter_count: filterCount,
};
// Include entrypoint info on first beacon only
if (!sentEntry.current) {
payload.entry_path = entryPath.current;
payload.referrer = referrer.current;
sentEntry.current = true;
}
navigator.sendBeacon(
apiUrl('telemetry'),
new Blob([JSON.stringify(payload)], { type: 'application/json' })
);
};
const interval = setInterval(send, 30_000);
return () => clearInterval(interval);
}, []);
}
/** Extract the referrer domain, or "direct" if none / same-origin. */
function extractReferrerDomain(): string {
if (!document.referrer) return 'direct';
try {
const url = new URL(document.referrer);
// Same-origin navigation isn't a real external referrer
if (url.origin === window.location.origin) return 'direct';
return url.hostname;
} catch {
return 'direct';
}
}

View file

@ -30,8 +30,12 @@ export function useTravelDestinations(mode: TransportMode) {
return res.json();
})
.then((data: { destinations: Destination[] }) => {
cacheRef.current[mode] = data.destinations;
setDestinations(data.destinations);
const normalized = data.destinations.map((d) => ({
...d,
city: d.city === 'City of London' ? 'London' : d.city,
}));
cacheRef.current[mode] = normalized;
setDestinations(normalized);
})
.catch((err) => logNonAbortError('travel destinations', err))
.finally(() => setLoading(false));

View file

@ -1,4 +1,6 @@
import { useState, useCallback, useMemo } from 'react';
import type { ComponentType } from 'react';
import { CarIcon, BicycleIcon, WalkingIcon, TransitIcon } from '../components/ui/icons';
export type TransportMode = 'car' | 'bicycle' | 'walking' | 'transit';
@ -18,6 +20,13 @@ export const MODE_DESCRIPTIONS: Record<TransportMode, string> = {
transit: 'Journey time by train, tube, and bus',
};
export const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
export interface TravelTimeEntry {
mode: TransportMode;
slug: string;

View file

@ -7,48 +7,48 @@ const STORAGE_KEY = 'tutorial_completed';
const STEPS: Step[] = [
{
target: '[data-tutorial="filters"]',
title: 'Filter Properties',
title: 'Tell the map what matters',
content:
'Use filters to narrow down to areas which contain matching properties. Filter by crime rate, number of schools around, or filter to an area with detached houses. Pin a filter with the eye icon to colour the map by that feature.',
'Set your budget, commute limit, school quality, crime threshold \u2014 whatever matters to you. Only areas that qualify stay lit. Use the eye icon to colour by any feature.',
placement: 'right',
disableBeacon: true,
},
{
target: '[data-tutorial="ai-filters"]',
title: 'AI-Powered Filters',
title: 'Or just describe it',
content:
'Describe your ideal area in plain English — like "quiet neighbourhood with good schools" — and AI will set up the right filters for you automatically.',
'Type what you want in plain English \u2014 like "quiet area near good schools under \u00A3400k" \u2014 and we\u2019ll set up the filters for you.',
placement: 'right',
disableBeacon: true,
},
{
target: '[data-tutorial="map"]',
title: 'Explore the Map',
title: 'Explore what\u2019s out there',
content:
'Pan and zoom to explore property data across England. Click any area (hexagon or postcode boundary) to see detailed stats of historical or currently sold properties matching your filters.',
'Pan and zoom across England. Click any coloured area to see crime, schools, prices, broadband, noise \u2014 everything about that neighbourhood.',
placement: 'bottom',
disableBeacon: true,
},
{
target: '[data-tutorial="search"]',
title: 'Search Locations',
content: 'Search for a place name or postcode to jump directly to that area on the map.',
title: 'Jump to a location',
content: 'Search for any place or postcode to fly straight there.',
placement: 'bottom',
disableBeacon: true,
},
{
target: '[data-tutorial="right-pane"]',
title: 'Area Stats & Properties',
title: 'Dig into the details',
content:
'After clicking a hexagon, view aggregated area statistics or browse individual properties in this pane.',
'See area statistics, histograms, and individual property records \u2014 prices, floor area, energy ratings, and more.',
placement: 'left',
disableBeacon: true,
},
{
target: '[data-tutorial="poi-button"]',
title: 'Points of Interest',
title: 'What\u2019s nearby?',
content:
'Toggle points of interest like schools, shops, and transport stops to see what amenities are nearby.',
'Toggle schools, shops, stations, parks, and restaurants on the map to see what\u2019s within reach.',
placement: 'left',
disableBeacon: true,
styles: {
@ -59,13 +59,13 @@ const STEPS: Step[] = [
},
];
export function useTutorial(initialLoading: boolean, isMobile: boolean) {
export function useTutorial(initialLoading: boolean, isMobile: boolean, blocked = false) {
const [run, setRun] = useState(() => {
if (isMobile) return false;
return !localStorage.getItem(STORAGE_KEY);
});
const shouldRun = run && !initialLoading && !isMobile;
const shouldRun = run && !initialLoading && !isMobile && !blocked;
const handleCallback = useCallback((data: CallBackProps) => {
const { status, action, type } = data;

View file

@ -59,6 +59,11 @@ export async function fetchWithRetry<T>(
}
}
/** Fire-and-forget request to pre-warm the screenshot cache for OG images. */
export function prewarmScreenshot(params: string): void {
fetch(apiUrl('screenshot', new URLSearchParams(`og=1&${params}`)), authHeaders()).catch(() => {}); // best-effort, don't care if it fails
}
export async function shortenUrl(params: string): Promise<string> {
const res = await fetch(apiUrl('shorten'), {
method: 'POST',

View file

@ -1,7 +1,21 @@
/** Copy text to clipboard with execCommand fallback for older browsers. */
export function copyToClipboard(text: string, onSuccess: () => void): void {
if (navigator.clipboard?.writeText) {
navigator.clipboard.writeText(text).then(onSuccess);
navigator.clipboard
.writeText(text)
.then(onSuccess)
.catch(() => {
// Fallback if clipboard permission denied
const ta = document.createElement('textarea');
ta.value = text;
ta.style.position = 'fixed';
ta.style.opacity = '0';
document.body.appendChild(ta);
ta.select();
document.execCommand('copy');
document.body.removeChild(ta);
onSuccess();
});
} else {
const ta = document.createElement('textarea');
ta.value = text;

View file

@ -35,7 +35,7 @@ export const ZOOM_TO_RESOLUTION_THRESHOLDS = [
{ maxZoom: 13, resolution: 9 },
] as const;
export const POSTCODE_ZOOM_THRESHOLD = 16;
export const POSTCODE_ZOOM_THRESHOLD = 15;
export const FEATURE_GRADIENT: { t: number; color: [number, number, number] }[] = [
{ t: 0, color: [46, 204, 113] },
@ -183,8 +183,8 @@ export const STACKED_ENUM_GROUPS: Record<
},
{
label: 'Leasehold/Freehold',
feature: 'Leashold/Freehold',
components: ['Leashold/Freehold'],
feature: 'Leasehold/Freehold',
components: ['Leasehold/Freehold'],
valueOrder: ['Freehold', 'Leasehold'],
valueColors: ['#3b82f6', '#f59e0b'],
},

View file

@ -49,24 +49,56 @@ const RIGHTMOVE_PRICES = [
3000000, 4000000, 5000000, 7500000, 10000000, 15000000, 20000000,
];
function nearestRadius(target: number, allowed: number[]): number {
return allowed.reduce((best, r) => (Math.abs(r - target) < Math.abs(best - target) ? r : best));
}
// Rightmove allowed monthly rent values (pcm)
const RIGHTMOVE_RENTS = [
250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000, 3500,
4000, 5000, 7500, 10000, 15000, 25000,
];
/** Snap minPrice down and maxPrice up so Rightmove doesn't ignore them */
function snapRightmovePrice(value: number, direction: 'floor' | 'ceil'): number {
// OnTheMarket allowed buy prices
const OTM_PRICES = [
50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 125000, 130000, 140000, 150000, 160000,
170000, 175000, 180000, 190000, 200000, 210000, 220000, 230000, 240000, 250000, 275000, 300000,
325000, 350000, 375000, 400000, 425000, 450000, 475000, 500000, 550000, 600000, 650000, 700000,
750000, 800000, 900000, 1000000, 1250000, 1500000, 2000000, 2500000, 3000000, 5000000, 7500000,
10000000, 15000000,
];
// OnTheMarket allowed monthly rent values (pcm)
const OTM_RENTS = [
100, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 1100,
1200, 1250, 1300, 1400, 1500, 1750, 2000, 2500, 3000, 3500, 4000, 5000, 7500, 10000, 25000,
];
// Zoopla allowed buy prices
const ZOOPLA_PRICES = [
10000, 25000, 50000, 75000, 100000, 125000, 150000, 175000, 200000, 225000, 250000, 275000,
300000, 325000, 350000, 375000, 400000, 425000, 450000, 475000, 500000, 550000, 600000, 650000,
700000, 800000, 900000, 1000000, 1250000, 1500000, 1750000, 2000000, 2500000, 3000000, 4000000,
5000000, 7500000, 10000000, 15000000,
];
// Zoopla allowed monthly rent values (pcm)
const ZOOPLA_RENTS = [
100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000, 3500, 4000,
5000, 7500, 10000, 25000,
];
function snapToAllowed(value: number, allowed: number[], direction: 'floor' | 'ceil'): number {
if (direction === 'floor') {
// Largest supported value <= target
for (let i = RIGHTMOVE_PRICES.length - 1; i >= 0; i--) {
if (RIGHTMOVE_PRICES[i] <= value) return RIGHTMOVE_PRICES[i];
for (let i = allowed.length - 1; i >= 0; i--) {
if (allowed[i] <= value) return allowed[i];
}
return RIGHTMOVE_PRICES[0];
return allowed[0];
}
// Smallest supported value >= target
for (const p of RIGHTMOVE_PRICES) {
for (const p of allowed) {
if (p >= value) return p;
}
return RIGHTMOVE_PRICES[RIGHTMOVE_PRICES.length - 1];
return allowed[allowed.length - 1];
}
function nearestRadius(target: number, allowed: number[]): number {
return allowed.reduce((best, r) => (Math.abs(r - target) < Math.abs(best - target) ? r : best));
}
interface SearchUrlOptions {
@ -90,7 +122,19 @@ export function buildPropertySearchUrls({
const radiusMiles = isPostcode ? 0.25 : (H3_RADIUS_MILES[resolution] ?? 1);
const priceFilter = filters['Last known price'];
const listingStatus = filters['Listing status'];
const isRent =
Array.isArray(listingStatus) &&
typeof listingStatus[0] === 'string' &&
(listingStatus as string[]).includes('For rent');
// Check price filters in priority order: asking price (current listings) > estimated > last known
// For rent mode, check asking rent first
const priceFilter = isRent
? filters['Asking rent (monthly)']
: (filters['Asking price'] ??
filters['Estimated current price'] ??
filters['Last known price']);
const minPrice =
Array.isArray(priceFilter) && typeof priceFilter[0] === 'number' ? priceFilter[0] : undefined;
const maxPrice =
@ -131,15 +175,16 @@ export function buildPropertySearchUrls({
// Rightmove — requires locationIdentifier from typeahead API
let rightmove: string | null = null;
if (rightmoveLocationId) {
const rmPrices = isRent ? RIGHTMOVE_RENTS : RIGHTMOVE_PRICES;
const rmParams = new URLSearchParams();
rmParams.set('searchLocation', postcode);
rmParams.set('useLocationIdentifier', 'true');
rmParams.set('locationIdentifier', rightmoveLocationId);
rmParams.set('radius', String(nearestRadius(radiusMiles, RIGHTMOVE_RADII)));
if (minPrice !== undefined)
rmParams.set('minPrice', String(snapRightmovePrice(minPrice, 'floor')));
rmParams.set('minPrice', String(snapToAllowed(minPrice, rmPrices, 'floor')));
if (maxPrice !== undefined)
rmParams.set('maxPrice', String(snapRightmovePrice(maxPrice, 'ceil')));
rmParams.set('maxPrice', String(snapToAllowed(maxPrice, rmPrices, 'ceil')));
if (minBedrooms !== undefined) rmParams.set('minBedrooms', String(Math.floor(minBedrooms)));
if (maxBedrooms !== undefined) rmParams.set('maxBedrooms', String(Math.ceil(maxBedrooms)));
if (minBathrooms !== undefined) rmParams.set('minBathrooms', String(Math.floor(minBathrooms)));
@ -155,20 +200,24 @@ export function buildPropertySearchUrls({
];
if (rmTypes.length > 0) rmParams.set('propertyTypes', rmTypes.join(','));
}
if (selectedTenures.length > 0) {
if (!isRent && selectedTenures.length > 0) {
const rmTenures = selectedTenures.map((t) => (t === 'Freehold' ? 'FREEHOLD' : 'LEASEHOLD'));
rmParams.set('tenureTypes', rmTenures.join(','));
}
rmParams.set('_includeSSTC', 'on');
rightmove = `https://www.rightmove.co.uk/property-for-sale/find.html?${rmParams.toString()}`;
if (!isRent) rmParams.set('_includeSSTC', 'on');
const rmPath = isRent ? 'property-to-rent' : 'property-for-sale';
rightmove = `https://www.rightmove.co.uk/${rmPath}/find.html?${rmParams.toString()}`;
}
// OnTheMarket — postcode slug in URL path (e.g. "SW1A 1AA" → "sw1a-1aa")
const otmSlug = postcode.toLowerCase().replace(/\s+/g, '-');
const otmPrices = isRent ? OTM_RENTS : OTM_PRICES;
const otmParams = new URLSearchParams();
otmParams.set('radius', String(nearestRadius(radiusMiles, OTM_RADII)));
if (minPrice !== undefined) otmParams.set('min-price', String(Math.round(minPrice)));
if (maxPrice !== undefined) otmParams.set('max-price', String(Math.round(maxPrice)));
if (minPrice !== undefined)
otmParams.set('min-price', String(snapToAllowed(minPrice, otmPrices, 'floor')));
if (maxPrice !== undefined)
otmParams.set('max-price', String(snapToAllowed(maxPrice, otmPrices, 'ceil')));
if (selectedTypes.length > 0) {
const otmTypes = [
...new Set(selectedTypes.map((t) => PROPERTY_TYPE_MAP[t]?.onthemarket).filter(Boolean)),
@ -178,15 +227,20 @@ export function buildPropertySearchUrls({
}
}
otmParams.set('view', 'map-list');
const onthemarket = `https://www.onthemarket.com/for-sale/property/${otmSlug}/?${otmParams.toString()}`;
const otmPath = isRent ? 'to-rent' : 'for-sale';
const onthemarket = `https://www.onthemarket.com/${otmPath}/property/${otmSlug}/?${otmParams.toString()}`;
// Zoopla
const zPrices = isRent ? ZOOPLA_RENTS : ZOOPLA_PRICES;
const zParams = new URLSearchParams();
zParams.set('q', postcode);
zParams.set('search_source', 'for-sale');
const zSearchSource = isRent ? 'to-rent' : 'for-sale';
zParams.set('search_source', zSearchSource);
zParams.set('radius', String(nearestRadius(radiusMiles, ZOOPLA_RADII)));
if (minPrice !== undefined) zParams.set('price_min', String(Math.round(minPrice)));
if (maxPrice !== undefined) zParams.set('price_max', String(Math.round(maxPrice)));
if (minPrice !== undefined)
zParams.set('price_min', String(snapToAllowed(minPrice, zPrices, 'floor')));
if (maxPrice !== undefined)
zParams.set('price_max', String(snapToAllowed(maxPrice, zPrices, 'ceil')));
if (selectedTypes.length > 0) {
const zTypes = [
...new Set(selectedTypes.map((t) => PROPERTY_TYPE_MAP[t]?.zoopla).filter(Boolean)),
@ -195,18 +249,17 @@ export function buildPropertySearchUrls({
zParams.append('property_sub_type', zt!);
}
}
const zoopla = `https://www.zoopla.co.uk/for-sale/property/?${zParams.toString()}`;
const zoopla = `https://www.zoopla.co.uk/${zSearchSource}/property/?${zParams.toString()}`;
// OpenRent — rent mode only
const listingStatus = filters['Listing status'];
const isRent =
Array.isArray(listingStatus) &&
typeof listingStatus[0] === 'string' &&
(listingStatus as string[]).includes('For rent');
let openrent: string | null = null;
if (isRent) {
const postcodeNoSpaces = postcode.replace(/\s+/g, '');
const orSlug = postcodeNoSpaces.toLowerCase();
const orParams = new URLSearchParams();
orParams.set('term', postcode);
orParams.set('term', postcodeNoSpaces.toUpperCase());
const radiusKm = Math.round((isPostcode ? 0.25 : radiusMiles) * 1.609);
orParams.set('area', String(Math.max(1, radiusKm)));
const rentFilter = filters['Asking rent (monthly)'];
const minRent =
Array.isArray(rentFilter) && typeof rentFilter[0] === 'number' ? rentFilter[0] : undefined;
@ -216,7 +269,7 @@ export function buildPropertySearchUrls({
if (maxRent !== undefined) orParams.set('prices_max', String(Math.round(maxRent)));
if (minBedrooms !== undefined) orParams.set('bedrooms_min', String(Math.floor(minBedrooms)));
if (maxBedrooms !== undefined) orParams.set('bedrooms_max', String(Math.ceil(maxBedrooms)));
openrent = `https://www.openrent.com/properties-to-rent?${orParams.toString()}`;
openrent = `https://www.openrent.co.uk/properties-to-rent/${orSlug}?${orParams.toString()}`;
}
return { rightmove, onthemarket, zoopla, openrent };

View file

@ -23,6 +23,26 @@ export function formatFilterValue(value: number, raw?: boolean): string {
return value.toFixed(2);
}
/** Parse a user-typed value like "250k", "1.2M", "£300000", "50 sqm" back to a number. */
export function parseInputValue(
text: string,
opts?: { prefix?: string; suffix?: string; step?: number }
): number | null {
let s = text.trim();
if (opts?.prefix) s = s.replace(new RegExp(`^\\${opts.prefix}`), '');
if (opts?.suffix) s = s.replace(new RegExp(`${opts.suffix.trim()}$`), '');
s = s.trim().replace(/,/g, '');
const m = s.match(/^(-?\d+\.?\d*)\s*([kKmM]?)$/);
if (!m) return null;
let val = parseFloat(m[1]);
if (isNaN(val)) return null;
const unit = m[2].toLowerCase();
if (unit === 'k') val *= 1_000;
else if (unit === 'm') val *= 1_000_000;
if (opts?.step) val = Math.round(val / opts.step) * opts.step;
return val;
}
export function formatDuration(d: string): string {
if (d === 'F') return 'Freehold';
if (d === 'L') return 'Leasehold';

View file

@ -40,7 +40,9 @@ def download_and_convert(output_path: Path) -> None:
df = pl.concat(frames)
print(f"Total rows: {df.height}")
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns(
result = df.rename(
{"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}
).with_columns(
pl.col("population").cast(pl.UInt32),
)
@ -48,7 +50,9 @@ def download_and_convert(output_path: Path) -> None:
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
print(f"England LSOAs: {result.height}")
print(f"Population range: {result['population'].min()} - {result['population'].max()}")
print(
f"Population range: {result['population'].min()} - {result['population'].max()}"
)
print(f"Mean population: {result['population'].mean():.0f}")
output_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -119,7 +119,11 @@ class PlaceHandler(osmium.SimpleHandler):
station_tag = tags.get("station", "")
network = tags.get("network", "").lower()
# Skip tram stops
if station_tag == "light_rail" or "tramlink" in network or "tram" in network:
if (
station_tag == "light_rail"
or "tramlink" in network
or "tram" in network
):
return
display_name = _station_display_name(name, tags)
self._add(display_name, "station", lat, lon, population)
@ -131,9 +135,7 @@ def main() -> None:
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
parser.add_argument(
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
parser.add_argument(
"--boundary",
type=Path,

View file

@ -111,9 +111,7 @@ def main() -> None:
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
parser.add_argument(
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
parser.add_argument(
"--boundary",
type=Path,

View file

@ -99,11 +99,15 @@ def convert_to_parquet(xls_path: Path, parquet_path: Path) -> None:
combined = pl.concat(frames)
# Remap old LA codes to new unitary authority codes and average medians
combined = combined.with_columns(
combined = (
combined.with_columns(
pl.col("area_code").replace(LA_CONSOLIDATION),
).group_by("area_code", "bedrooms").agg(
)
.group_by("area_code", "bedrooms")
.agg(
pl.col("median_monthly_rent").mean(),
)
)
print(f"Combined: {combined.shape}")
print(f"Non-null medians: {combined['median_monthly_rent'].drop_nulls().len()}")

View file

@ -13,9 +13,7 @@ TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
df = pl.read_parquet(postcodes_path, columns=["Postcode"])
outcodes = sorted(
set(df["Postcode"].str.split(" ").list.first().to_list()) - {""}
)
outcodes = sorted(set(df["Postcode"].str.split(" ").list.first().to_list()) - {""})
print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...")
mapping: dict[str, str] = {}
@ -28,11 +26,9 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
data = resp.json()
found = False
for m in data.get("matches", []):
if (
m["type"] == "OUTCODE"
and m["displayName"].upper().replace(" ", "")
== oc.upper().replace(" ", "")
):
if m["type"] == "OUTCODE" and m["displayName"].upper().replace(
" ", ""
) == oc.upper().replace(" ", ""):
mapping[oc] = str(m["id"])
found = True
break
@ -57,9 +53,7 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
def main() -> None:
parser = argparse.ArgumentParser(
description="Fetch Rightmove outcode ID mapping"
)
parser = argparse.ArgumentParser(description="Fetch Rightmove outcode ID mapping")
parser.add_argument(
"--postcodes", type=Path, required=True, help="postcode.parquet path"
)

View file

@ -64,7 +64,9 @@ def ensure_pmtiles_cli(bin_path: Path, version: str) -> None:
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--output", type=Path, required=True, help="Output .pmtiles path")
parser.add_argument(
"--output", type=Path, required=True, help="Output .pmtiles path"
)
parser.add_argument(
"--pmtiles-version", default="1.22.3", help="go-pmtiles release version"
)

View file

@ -56,7 +56,9 @@ NR_TIMETABLE_URL = "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/time
USER_AGENT = "property-map-pipeline/1.0 (https://github.com)"
def _download_http(url: str, dest: Path, *, desc: str, headers: dict | None = None) -> None:
def _download_http(
url: str, dest: Path, *, desc: str, headers: dict | None = None
) -> None:
"""Stream-download a URL to a file with progress bar."""
dest.parent.mkdir(parents=True, exist_ok=True)
tmp = dest.with_suffix(dest.suffix + ".tmp")
@ -117,9 +119,10 @@ def clean_gtfs(src: Path, dst: Path) -> None:
return
print("Cleaning GTFS for R5 compatibility...")
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
dst, "w", zipfile.ZIP_DEFLATED
) as zout:
with (
zipfile.ZipFile(src, "r") as zin,
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist():
if info.filename == "stop_times.txt":
dropped = 0
@ -127,7 +130,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
header = f.readline()
header_str = header.decode("utf-8").strip()
cols = header_str.split(",")
arr_idx = cols.index("arrival_time") if "arrival_time" in cols else -1
arr_idx = (
cols.index("arrival_time") if "arrival_time" in cols else -1
)
dep_idx = (
cols.index("departure_time") if "departure_time" in cols else -1
)
@ -179,7 +184,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
year = int(date_val[:4])
if year > 2100:
parts[i] = "20991231"
print(f" feed_info: capped end_date {date_val} → 20991231")
print(
f" feed_info: capped end_date {date_val} → 20991231"
)
fixed_lines.append(",".join(parts))
zout.writestr("feed_info.txt", "\n".join(fixed_lines) + "\n")
else:
@ -334,7 +341,9 @@ def convert_high_freq_to_frequency_based(
end_secs = trips[-1][1] + int(median_hw)
headway_rounded = max(60, round(median_hw / 60) * 60)
frequency_entries.append((template_trip_id, start_secs, end_secs, headway_rounded))
frequency_entries.append(
(template_trip_id, start_secs, end_secs, headway_rounded)
)
for trip_id, _ in trips[1:]:
trips_to_remove.add(trip_id)
groups_converted += 1
@ -344,9 +353,10 @@ def convert_high_freq_to_frequency_based(
print(f" Created {len(frequency_entries)} frequency entries")
# Step 5: Write modified GTFS
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
dst, "w", zipfile.ZIP_DEFLATED
) as zout:
with (
zipfile.ZipFile(src, "r") as zin,
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist():
if info.filename == "trips.txt":
with zin.open(info) as f:
@ -466,15 +476,22 @@ def download_national_rail_cif(raw_dir: Path) -> Path | None:
email = os.environ.get("NATIONAL_RAIL_EMAIL")
password = os.environ.get("NATIONAL_RAIL_PASSWORD")
if not email or not password:
print("Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail")
print(
"Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail"
)
return None
print("Authenticating with National Rail Open Data...")
auth_data = urllib.parse.urlencode({"username": email, "password": password}).encode()
auth_data = urllib.parse.urlencode(
{"username": email, "password": password}
).encode()
auth_req = urllib.request.Request(
NR_AUTH_URL,
data=auth_data,
headers={"User-Agent": USER_AGENT, "Content-Type": "application/x-www-form-urlencoded"},
headers={
"User-Agent": USER_AGENT,
"Content-Type": "application/x-www-form-urlencoded",
},
)
with urllib.request.urlopen(auth_req) as resp:
token_data = json.loads(resp.read())
@ -565,9 +582,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
coords_fixed = 0
route_types_fixed = 0
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
dst, "w", zipfile.ZIP_DEFLATED
) as zout:
with (
zipfile.ZipFile(src, "r") as zin,
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist():
# Skip non-standard links.txt
if info.filename == "links.txt":
@ -581,8 +599,12 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
trip_id_idx = cols.index("trip_id")
stop_id_idx = cols.index("stop_id")
seq_idx = cols.index("stop_sequence")
pickup_idx = cols.index("pickup_type") if "pickup_type" in cols else -1
dropoff_idx = cols.index("drop_off_type") if "drop_off_type" in cols else -1
pickup_idx = (
cols.index("pickup_type") if "pickup_type" in cols else -1
)
dropoff_idx = (
cols.index("drop_off_type") if "drop_off_type" in cols else -1
)
tmp = tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".txt"
@ -769,16 +791,27 @@ def _docker_run_dtd2mysql(
) -> None:
"""Run dtd2mysql in a Node.js container on the same Docker network as MariaDB."""
cmd = [
"docker", "run", "--rm", "--network", network,
"-e", f"DATABASE_HOSTNAME={db_container}",
"-e", "DATABASE_USERNAME=root",
"-e", "DATABASE_PASSWORD=root",
"-e", "DATABASE_NAME=dtd",
"docker",
"run",
"--rm",
"--network",
network,
"-e",
f"DATABASE_HOSTNAME={db_container}",
"-e",
"DATABASE_USERNAME=root",
"-e",
"DATABASE_PASSWORD=root",
"-e",
"DATABASE_NAME=dtd",
]
for v in volumes:
cmd.extend(["-v", v])
# Install zip (needed for --gtfs-zip) then run dtd2mysql
inner = "apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql " + " ".join(args)
inner = (
"apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql "
+ " ".join(args)
)
cmd.extend(["node:20", "bash", "-c", inner])
subprocess.run(cmd, check=True)
@ -805,11 +838,17 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
subprocess.run(["docker", "network", "create", network], capture_output=True)
subprocess.run(
[
"docker", "run", "-d",
"--name", db_container,
"--network", network,
"-e", "MARIADB_ROOT_PASSWORD=root",
"-e", "MARIADB_DATABASE=dtd",
"docker",
"run",
"-d",
"--name",
db_container,
"--network",
network,
"-e",
"MARIADB_ROOT_PASSWORD=root",
"-e",
"MARIADB_DATABASE=dtd",
"mariadb:latest",
],
check=True,
@ -820,7 +859,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
print(" Waiting for MariaDB to be ready...")
for attempt in range(30):
result = subprocess.run(
["docker", "exec", db_container, "mariadb", "-uroot", "-proot", "-e", "SELECT 1"],
[
"docker",
"exec",
db_container,
"mariadb",
"-uroot",
"-proot",
"-e",
"SELECT 1",
],
capture_output=True,
)
if result.returncode == 0:
@ -833,14 +881,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
print("Importing CIF timetable into MariaDB...")
_docker_run_dtd2mysql(
network, db_container,
network,
db_container,
volumes=[f"{raw_abs}:/data:ro"],
args=["--timetable", "/data/national_rail_cif.zip"],
)
print("Exporting GTFS from MariaDB...")
_docker_run_dtd2mysql(
network, db_container,
network,
db_container,
volumes=[f"{raw_abs}:/output"],
args=["--gtfs-zip", "/output/national_rail_gtfs_raw.zip"],
)

View file

@ -94,11 +94,18 @@ def _build(
# Remap terminated postcodes to nearest active successor
postcode_mapping = build_postcode_mapping(arcgis_path)
wide = wide.join(
postcode_mapping.lazy(), left_on="postcode", right_on="old_postcode", how="left"
).with_columns(
wide = (
wide.join(
postcode_mapping.lazy(),
left_on="postcode",
right_on="old_postcode",
how="left",
)
.with_columns(
pl.coalesce("new_postcode", "postcode").alias("postcode"),
).drop("new_postcode")
)
.drop("new_postcode")
)
arcgis = (
pl.scan_parquet(arcgis_path)
@ -179,11 +186,11 @@ def _build(
lsoa_pop = pl.scan_parquet(lsoa_population_path)
wide = wide.join(lsoa_pop, on="lsoa21", how="left")
wide = wide.with_columns(
(pl.col("serious_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
pl.when(pl.col("population") > 0)
.then((pl.col("serious_crime_avg_yr") / pl.col("population") * 1000).round(1))
.alias("serious_crime_per_1k"),
(pl.col("minor_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
pl.when(pl.col("population") > 0)
.then((pl.col("minor_crime_avg_yr") / pl.col("population") * 1000).round(1))
.alias("minor_crime_per_1k"),
).drop("population")
@ -252,7 +259,8 @@ def _build(
.otherwise(pl.col("pp_property_type"))
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
# collapse terrace sub-types, and fold rare types into "Other"
.replace({
.replace(
{
"Flat": "Flats/Maisonettes",
"Maisonette": "Flats/Maisonettes",
"End-Terrace": "Terraced",
@ -261,7 +269,8 @@ def _build(
"Enclosed Mid-Terrace": "Terraced",
"Bungalow": "Other",
"Park home": "Other",
})
}
)
.alias("property_type")
)
@ -426,10 +435,16 @@ def main():
help="Census 2021 population by LSOA parquet file",
)
parser.add_argument(
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
"--output-postcodes",
type=Path,
required=True,
help="Output postcode parquet file path",
)
parser.add_argument(
"--output-properties", type=Path, required=True, help="Output properties parquet file path"
"--output-properties",
type=Path,
required=True,
help="Output properties parquet file path",
)
args = parser.parse_args()

View file

@ -454,9 +454,7 @@ class TestFillHoles:
hole1 = [(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)]
outer2 = [(60, 60), (110, 60), (110, 110), (60, 110), (60, 60)]
hole2 = [(70, 70), (80, 70), (80, 80), (70, 80), (70, 70)]
mp = MultiPolygon(
[Polygon(outer1, [hole1]), Polygon(outer2, [hole2])]
)
mp = MultiPolygon([Polygon(outer1, [hole1]), Polygon(outer2, [hole2])])
result = _fill_holes(mp)
assert result.geom_type == "MultiPolygon"
for p in result.geoms:

View file

@ -112,7 +112,9 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
valid = (
np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
)
actual = actual[valid]
predicted = predicted[valid]
@ -176,7 +178,10 @@ def main():
"--input", type=Path, required=True, help="Path to properties.parquet"
)
parser.add_argument(
"--postcodes", type=Path, required=True, help="Path to postcode.parquet (for lat/lon)"
"--postcodes",
type=Path,
required=True,
help="Path to postcode.parquet (for lat/lon)",
)
parser.add_argument(
"--output", type=Path, required=True, help="Output backtest_results.parquet"
@ -185,7 +190,9 @@ def main():
# Build index from pre-test data only (temporal holdout)
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
index = build_index(args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes)
index = build_index(
args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes
)
print(
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
f"{index['type_group'].n_unique()} type groups"
@ -233,7 +240,9 @@ def main():
knn_est = knn_psm * fa * temporal_adj
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
print(f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)")
print(
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
)
# Blend: (1-w)*index + w*kNN where both available
index_est = test["predicted"].to_numpy().astype(np.float64)

View file

@ -107,9 +107,7 @@ def main():
pl.when(has_price)
.then(
pl.col("Last known price").cast(pl.Float64)
* (
pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp")
)
* (pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp"))
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
.exp()
)

View file

@ -105,9 +105,7 @@ def extract_pairs(input_path: Path, max_year2: int | None = None) -> pl.DataFram
.alias("log_ratio"),
(
1.0
/ (pl.col("frac_year2") - pl.col("frac_year1"))
.cast(pl.Float64)
.sqrt()
/ (pl.col("frac_year2") - pl.col("frac_year1")).cast(pl.Float64).sqrt()
).alias("weight"),
)
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
@ -453,8 +451,12 @@ def main():
description="Build improved repeat-sales price index"
)
parser.add_argument("--input", type=Path, required=True)
parser.add_argument("--postcodes", type=Path, required=True,
help="Path to postcode.parquet (for lat/lon centroids)")
parser.add_argument(
"--postcodes",
type=Path,
required=True,
help="Path to postcode.parquet (for lat/lon centroids)",
)
parser.add_argument("--output", type=Path, required=True)
args = parser.parse_args()

View file

@ -43,9 +43,7 @@ def build_knn_pool(
"""
print("Building kNN pool...")
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
query = (
lf
.select(
query = lf.select(
"Postcode",
"Property type",
"lat",
@ -53,8 +51,7 @@ def build_knn_pool(
"Total floor area (sqm)",
"Last known price",
"Date of last transaction",
)
.filter(
).filter(
pl.col("lat").is_not_null(),
pl.col("lon").is_not_null(),
pl.col("Total floor area (sqm)").is_not_null(),
@ -64,27 +61,21 @@ def build_knn_pool(
pl.col("Postcode").is_not_null(),
pl.col("Date of last transaction").is_not_null(),
)
)
if max_sale_year is not None:
query = query.filter(
pl.col("Date of last transaction").dt.year() < max_sale_year
)
pool = (
query.with_columns(
pool = query.with_columns(
sector_expr(),
type_group_expr(),
(
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
+ (
pl.col("Date of last transaction").dt.month().cast(pl.Float64)
- 1.0
)
+ (pl.col("Date of last transaction").dt.month().cast(pl.Float64) - 1.0)
/ 12.0
).alias("_sale_fy"),
pl.lit(ref_frac_year).alias("_ref_fy"),
).collect()
)
pool = pool.filter(pl.col("type_group").is_not_null())
print(f" {len(pool):,} pool properties with lat/lon, floor area, price")

View file

@ -1085,7 +1085,9 @@ def transform(
if cat not in all_set:
mapped_but_absent.append(cat)
if mapped_but_absent:
print(f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}")
print(
f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}"
)
# Drop unwanted categories
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))

View file

@ -37,4 +37,4 @@ def extract_zip(zip_path: Path, extract_dir: Path) -> None:
"""Extract a ZIP archive into the given directory."""
extract_dir.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(extract_dir)
zf.extractall(extract_dir, filter="data")

View file

@ -27,7 +27,9 @@ def load_england_polygon(geojson_path: Path) -> PreparedGeometry:
return prep(geometry)
def in_england_mask(geojson_path: Path, lats: np.ndarray, lngs: np.ndarray) -> np.ndarray:
def in_england_mask(
geojson_path: Path, lats: np.ndarray, lngs: np.ndarray
) -> np.ndarray:
"""Vectorized check: which (lat, lng) points are within England.
Returns a boolean numpy array.

View file

@ -84,7 +84,7 @@ def fuzzy_join_on_postcode(
right_match["_right_postcode"],
right_match["_right_address"],
):
if postcode is not None:
if address is not None and postcode is not None:
right_by_postcode.setdefault(postcode, []).append((idx, address))
# Group left side by postcode

View file

@ -106,7 +106,9 @@ def count_pois_per_postcode(
if nearby is None:
continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
within_mask = distances <= radius_km
within_indices = nearby[within_mask]
@ -179,7 +181,9 @@ def min_distance_per_postcode(
if nearby is None:
continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
for group, cat_mask in category_masks.items():
group_mask = cat_mask[nearby]

View file

@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
"""
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
active = (
arcgis.filter(pl.col("doterm").is_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
terminated = (
arcgis.filter(pl.col("doterm").is_not_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
print(
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
)
if terminated.height == 0:
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
return pl.DataFrame(
{
"old_postcode": pl.Series([], dtype=pl.Utf8),
"new_postcode": pl.Series([], dtype=pl.Utf8),
}
)
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
active_coords = np.column_stack(
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
)
terminated_coords = np.column_stack(
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
)
tree = cKDTree(active_coords)
distances, indices = tree.query(terminated_coords)
active_postcodes = active["pcds"]
mapping = pl.DataFrame({
mapping = pl.DataFrame(
{
"old_postcode": terminated["pcds"],
"new_postcode": active_postcodes.gather(indices),
})
}
)
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
print(
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
)
return mapping

View file

@ -72,7 +72,9 @@ def test_no_pois_returns_zeros(postcodes):
"category": pl.Series([], dtype=pl.String),
}
)
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0)
result = count_pois_per_postcode(
postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0
)
for group in POI_GROUPS:
col = f"{group}_2km"
@ -125,7 +127,9 @@ def test_min_distance_no_pois_returns_nan(postcodes):
"category": pl.Series([], dtype=pl.String),
}
)
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
result = min_distance_per_postcode(
postcodes, empty_pois, groups={"train_tube": ["Rail station"]}
)
assert "train_tube_nearest_km" in result.columns
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())

View file

@ -28,8 +28,8 @@
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_colwidth', 60)"
"pd.set_option(\"display.max_columns\", None)\n",
"pd.set_option(\"display.max_colwidth\", 60)"
]
},
{
@ -47,7 +47,7 @@
"metadata": {},
"outputs": [],
"source": [
"param_import_path = '/bulk/wide-2.parquet'\n",
"param_import_path = \"/bulk/wide-2.parquet\"\n",
"\n",
"param_lookback = 3"
]
@ -128,7 +128,7 @@
],
"source": [
"data = pl.scan_parquet(param_import_path).unique(subset=[\"Postcode\", \"Address per EPC\"])\n",
"data = data.filter(pl.col('Total floor area (sqm)') > 10)\n",
"data = data.filter(pl.col(\"Total floor area (sqm)\") > 10)\n",
"\n",
"# print(data.collect_schema()) # column names and types\n",
"print(data.select(pl.len()).collect()) # row count\n",
@ -146,21 +146,19 @@
"source": [
"columns_required = [\n",
" # absolute neccesity\n",
" 'Postcode',\n",
" 'Address per EPC',\n",
" 'historical_prices',\n",
" 'Price per sqm',\n",
"\n",
" \"Postcode\",\n",
" \"Address per EPC\",\n",
" \"historical_prices\",\n",
" \"Price per sqm\",\n",
" # faily fixed attributes\n",
" 'Property type', # or 'epc_property_type' or 'built_form'\n",
" 'Leashold/Freehold',\n",
" 'Total floor area (sqm)',\n",
" 'Rooms (including bedrooms & bathrooms)',\n",
" 'Approximate construction age',\n",
"\n",
" \"Property type\", # or 'epc_property_type' or 'built_form'\n",
" \"Leashold/Freehold\",\n",
" \"Total floor area (sqm)\",\n",
" \"Rooms (including bedrooms & bathrooms)\",\n",
" \"Approximate construction age\",\n",
" # latest\n",
" # 'date_of_transfer'\n",
" 'Last known price'\n",
" \"Last known price\",\n",
"]"
]
},
@ -440,8 +438,13 @@
],
"source": [
"# temp_Postcodes = [\"LE5 4ED\", \"E14 9GU\", \"YO8 9PW\", \"SW1P 3AN\", \"BH3 7DX\", \"E14 2DG\"]\n",
"temp_Postcodes = data.select('Postcode').collect().sample(10000)['Postcode'].to_list()\n",
"data_small = data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes)).select(columns_required).collect().to_pandas()\n",
"temp_Postcodes = data.select(\"Postcode\").collect().sample(10000)[\"Postcode\"].to_list()\n",
"data_small = (\n",
" data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes))\n",
" .select(columns_required)\n",
" .collect()\n",
" .to_pandas()\n",
")\n",
"data_small = data_small.explode(\"historical_prices\")\n",
"data_small[\"year\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"year\"])\n",
"data_small[\"price\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"price\"])\n",
@ -908,35 +911,45 @@
"from typing import Any\n",
"from pandas.core.frame import DataFrame\n",
"\n",
"print(f'rolling periods (relative): {[i for i in range(-param_lookback, 1)]}')\n",
"print(f\"rolling periods (relative): {[i for i in range(-param_lookback, 1)]}\")\n",
"\n",
"# Rolling average (±2 year), weighted by number of sales per year\n",
"pc_avg_raw = data_small.groupby(['Postcode', 'year']).agg(\n",
" ppsqm_sum=('Price per sqm', 'sum'),\n",
" ppsqm_count=('Price per sqm', 'count')\n",
").reset_index().sort_values(by=['Postcode', 'year'], ascending=False)\n",
"pc_avg_raw = (\n",
" data_small.groupby([\"Postcode\", \"year\"])\n",
" .agg(ppsqm_sum=(\"Price per sqm\", \"sum\"), ppsqm_count=(\"Price per sqm\", \"count\"))\n",
" .reset_index()\n",
" .sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
")\n",
"\n",
"display(pc_avg_raw)\n",
"\n",
"# Each year's totals contribute to year-1, year, and year+1\n",
"pc_avg_expanded = pd.concat([\n",
" pc_avg_raw.assign(year=pc_avg_raw['year'] + offset) for offset in range(-param_lookback, 1) # \n",
"])\n",
"pc_avg_expanded = pd.concat(\n",
" [\n",
" pc_avg_raw.assign(year=pc_avg_raw[\"year\"] + offset)\n",
" for offset in range(-param_lookback, 1) #\n",
" ]\n",
")\n",
"\n",
"display(pc_avg_expanded)\n",
"\n",
"# Sum counts and sums, then divide to get weighted mean\n",
"pc_avg_complex = pc_avg_expanded.groupby(['Postcode', 'year']).agg(\n",
" ppsqm_sum=('ppsqm_sum', 'sum'),\n",
" ppsqm_count=('ppsqm_count', 'sum')\n",
").reset_index()\n",
"pc_avg_complex['Price per sqm PC AVG'] = pc_avg_complex['ppsqm_sum'] / pc_avg_complex['ppsqm_count']\n",
"pc_avg_complex: Any | DataFrame = pc_avg_complex[['Postcode', 'year', 'Price per sqm PC AVG']].sort_values(by=['Postcode', 'year'], ascending=False)\n",
"pc_avg_complex = (\n",
" pc_avg_expanded.groupby([\"Postcode\", \"year\"])\n",
" .agg(ppsqm_sum=(\"ppsqm_sum\", \"sum\"), ppsqm_count=(\"ppsqm_count\", \"sum\"))\n",
" .reset_index()\n",
")\n",
"pc_avg_complex[\"Price per sqm PC AVG\"] = (\n",
" pc_avg_complex[\"ppsqm_sum\"] / pc_avg_complex[\"ppsqm_count\"]\n",
")\n",
"pc_avg_complex: Any | DataFrame = pc_avg_complex[\n",
" [\"Postcode\", \"year\", \"Price per sqm PC AVG\"]\n",
"].sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
"display(pc_avg_complex)\n",
"\n",
"temp_df = pc_avg_complex[pc_avg_complex['Postcode'] == data_small['Postcode'].iloc[0]]\n",
"print(data_small['Postcode'].iloc[0])\n",
"temp_df.plot.line(x='year', y='Price per sqm PC AVG')"
"temp_df = pc_avg_complex[pc_avg_complex[\"Postcode\"] == data_small[\"Postcode\"].iloc[0]]\n",
"print(data_small[\"Postcode\"].iloc[0])\n",
"temp_df.plot.line(x=\"year\", y=\"Price per sqm PC AVG\")"
]
},
{
@ -1111,9 +1124,13 @@
}
],
"source": [
"data_small = data_small.merge(pc_avg_complex, on=['Postcode', 'year'], suffixes=('', ' pc_avg_complex'))\n",
"data_small['c'] = data_small['Price per sqm'] / data_small['Price per sqm PC AVG']\n",
"data_small[['Postcode', 'Address per EPC', 'Price per sqm', 'Price per sqm PC AVG', 'c']]"
"data_small = data_small.merge(\n",
" pc_avg_complex, on=[\"Postcode\", \"year\"], suffixes=(\"\", \" pc_avg_complex\")\n",
")\n",
"data_small[\"c\"] = data_small[\"Price per sqm\"] / data_small[\"Price per sqm PC AVG\"]\n",
"data_small[\n",
" [\"Postcode\", \"Address per EPC\", \"Price per sqm\", \"Price per sqm PC AVG\", \"c\"]\n",
"]"
]
},
{
@ -1445,17 +1462,21 @@
],
"source": [
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n",
" n_sales=('c', 'count'),\n",
" year_min=('year', 'min'),\n",
" year_max=('year', 'max'),\n",
" c_mean=('c', 'mean'),\n",
" c_std=('c', 'std'),\n",
").dropna()\n",
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n",
"c_stats = (\n",
" data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
" .agg(\n",
" n_sales=(\"c\", \"count\"),\n",
" year_min=(\"year\", \"min\"),\n",
" year_max=(\"year\", \"max\"),\n",
" c_mean=(\"c\", \"mean\"),\n",
" c_std=(\"c\", \"std\"),\n",
" )\n",
" .dropna()\n",
")\n",
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
"c_stats.sort_values('c_cv', ascending=False).head(20)"
"c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
]
},
{
@ -2265,42 +2286,44 @@
"display(random_c)\n",
"\n",
"# pc avg trend\n",
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == random_c.index[0][0]].sort_values(by='year')\n",
"temp_pc_avg = pc_avg_complex[\n",
" pc_avg_complex[\"Postcode\"] == random_c.index[0][0]\n",
"].sort_values(by=\"year\")\n",
"display(temp_pc_avg)\n",
"\n",
"# c for specific address\n",
"temp_postcode = data_small[\n",
" (data_small['Postcode'] == random_c.index[0][0]) \n",
" (data_small[\"Postcode\"] == random_c.index[0][0])\n",
" # & (data_small['Address per EPC'] == random_c.index[0][1])\n",
"].sort_values(by='year')\n",
"].sort_values(by=\"year\")\n",
"display(temp_postcode)\n",
"\n",
"temp_address = data_small[\n",
" (data_small['Postcode'] == random_c.index[0][0]) \n",
" & (data_small['Address per EPC'] == random_c.index[0][1]) \n",
"].sort_values(by='year')\n",
" (data_small[\"Postcode\"] == random_c.index[0][0])\n",
" & (data_small[\"Address per EPC\"] == random_c.index[0][1])\n",
"].sort_values(by=\"year\")\n",
"display(temp_address)\n",
"\n",
"# plot\n",
"\n",
"fig, ax1 = plt.subplots()\n",
"\n",
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n",
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n",
"temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
"temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
"\n",
"ax2 = ax1.twinx()\n",
"ax2.set_ylim(0, 3)\n",
"\n",
"for property in temp_postcode['Address per EPC'].unique():\n",
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n",
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n",
"for property in temp_postcode[\"Address per EPC\"].unique():\n",
" property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
" property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
"\n",
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n",
"temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
"\n",
"ax1.set_ylabel('Price per sqm')\n",
"ax2.set_ylabel('c')\n",
"ax1.set_ylabel(\"Price per sqm\")\n",
"ax2.set_ylabel(\"c\")\n",
"\n",
"plt.show()\n"
"plt.show()"
]
},
{
@ -2640,17 +2663,21 @@
],
"source": [
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n",
" n_sales=('c', 'count'),\n",
" year_min=('year', 'min'),\n",
" year_max=('year', 'max'),\n",
" c_mean=('c', 'mean'),\n",
" c_std=('c', 'std'),\n",
").dropna()\n",
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n",
"c_stats = (\n",
" data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
" .agg(\n",
" n_sales=(\"c\", \"count\"),\n",
" year_min=(\"year\", \"min\"),\n",
" year_max=(\"year\", \"max\"),\n",
" c_mean=(\"c\", \"mean\"),\n",
" c_std=(\"c\", \"std\"),\n",
" )\n",
" .dropna()\n",
")\n",
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
"c_stats.sort_values('c_cv', ascending=False).head(20)"
"c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
]
},
{
@ -2685,31 +2712,41 @@
"\n",
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
"\n",
"axes[0].hist(c_stats['c_std'], bins=100, edgecolor='black')\n",
"axes[0].set_xlabel('Std of c')\n",
"axes[0].set_ylabel('Number of properties')\n",
"axes[0].set_title('Distribution of c stability (std)')\n",
"axes[0].axvline(x=c_stats['c_std'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_std'].median()}) threshold')\n",
"axes[0].hist(c_stats[\"c_std\"], bins=100, edgecolor=\"black\")\n",
"axes[0].set_xlabel(\"Std of c\")\n",
"axes[0].set_ylabel(\"Number of properties\")\n",
"axes[0].set_title(\"Distribution of c stability (std)\")\n",
"axes[0].axvline(\n",
" x=c_stats[\"c_std\"].median(),\n",
" color=\"red\",\n",
" linestyle=\"--\",\n",
" label=f\"Median ({c_stats['c_std'].median()}) threshold\",\n",
")\n",
"axes[0].legend()\n",
"\n",
"axes[1].hist(c_stats['c_cv'], bins=100, edgecolor='black')\n",
"axes[1].set_xlabel('CV of c (std/mean)')\n",
"axes[1].set_ylabel('Number of properties')\n",
"axes[1].set_title('Distribution of c stability (CV)')\n",
"axes[1].axvline(x=c_stats['c_cv'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_cv'].median()}) threshold')\n",
"axes[1].hist(c_stats[\"c_cv\"], bins=100, edgecolor=\"black\")\n",
"axes[1].set_xlabel(\"CV of c (std/mean)\")\n",
"axes[1].set_ylabel(\"Number of properties\")\n",
"axes[1].set_title(\"Distribution of c stability (CV)\")\n",
"axes[1].axvline(\n",
" x=c_stats[\"c_cv\"].median(),\n",
" color=\"red\",\n",
" linestyle=\"--\",\n",
" label=f\"Median ({c_stats['c_cv'].median()}) threshold\",\n",
")\n",
"axes[1].legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# output text\n",
"pct_stable = (c_stats['c_cv'] < 0.1).mean() * 100\n",
"pct_stable = (c_stats[\"c_cv\"] < 0.1).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.1\")\n",
"\n",
"pct_stable = (c_stats['c_cv'] < 0.2).mean() * 100\n",
"pct_stable = (c_stats[\"c_cv\"] < 0.2).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.2\")\n",
"\n",
"pct_stable = (c_stats['c_cv'] < 0.3).mean() * 100\n",
"pct_stable = (c_stats[\"c_cv\"] < 0.3).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.3\")"
]
},
@ -3299,7 +3336,7 @@
}
],
"source": [
"unstable_c = c_stats.sort_values('c_cv', ascending=False)['c_cv'][:20]\n",
"unstable_c = c_stats.sort_values(\"c_cv\", ascending=False)[\"c_cv\"][:20]\n",
"display(unstable_c)\n",
"\n",
"unstable_c_specific = random.randint(0, 20)\n",
@ -3308,41 +3345,43 @@
"print(unstable_c.index[unstable_c_specific][1])\n",
"\n",
"# pc avg trend\n",
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == unstable_c.index[unstable_c_specific][0]].sort_values(by='year')\n",
"temp_pc_avg = pc_avg_complex[\n",
" pc_avg_complex[\"Postcode\"] == unstable_c.index[unstable_c_specific][0]\n",
"].sort_values(by=\"year\")\n",
"display(temp_pc_avg)\n",
"\n",
"# c for specific postcode\n",
"temp_postcode = data_small[\n",
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n",
" (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
" # & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1])\n",
"].sort_values(by='year')\n",
"].sort_values(by=\"year\")\n",
"display(temp_address)\n",
"\n",
"# c for specific address\n",
"temp_address = data_small[\n",
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n",
" & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1]) \n",
"].sort_values(by='year')\n",
" (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
" & (data_small[\"Address per EPC\"] == unstable_c.index[unstable_c_specific][1])\n",
"].sort_values(by=\"year\")\n",
"display(temp_address)\n",
"\n",
"# plot\n",
"\n",
"fig, ax1 = plt.subplots()\n",
"\n",
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n",
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n",
"temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
"temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
"\n",
"ax2 = ax1.twinx()\n",
"\n",
"for property in temp_postcode['Address per EPC'].unique():\n",
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n",
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n",
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n",
"for property in temp_postcode[\"Address per EPC\"].unique():\n",
" property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
" property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
"temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
"\n",
"ax1.set_ylabel('Price per sqm')\n",
"ax2.set_ylabel('c')\n",
"ax1.set_ylabel(\"Price per sqm\")\n",
"ax2.set_ylabel(\"c\")\n",
"\n",
"plt.show()\n"
"plt.show()"
]
},
{
@ -3370,11 +3409,11 @@
],
"source": [
"# select random address\n",
"one_property = data_small.sample(1)[['Postcode', 'Address per EPC']].iloc[0]\n",
"postcode = one_property['Postcode']\n",
"address = one_property['Address per EPC']\n",
"print(f'Postcode: {postcode}')\n",
"print(f'Address: {address}')"
"one_property = data_small.sample(1)[[\"Postcode\", \"Address per EPC\"]].iloc[0]\n",
"postcode = one_property[\"Postcode\"]\n",
"address = one_property[\"Address per EPC\"]\n",
"print(f\"Postcode: {postcode}\")\n",
"print(f\"Address: {address}\")"
]
},
{
@ -3481,22 +3520,21 @@
],
"source": [
"property_data = data_small[\n",
" (data_small['Postcode'] == postcode) \n",
" & (data_small['Address per EPC'] == address) \n",
" (data_small[\"Postcode\"] == postcode) & (data_small[\"Address per EPC\"] == address)\n",
"]\n",
"latest_year = property_data['year'].max()\n",
"print(f'Latest year of data: {latest_year}')\n",
"latest_year = property_data[\"year\"].max()\n",
"print(f\"Latest year of data: {latest_year}\")\n",
"\n",
"# Get only the latest year's data for this property (this is what we want to predict)\n",
"data_small_test = property_data[property_data['year'] == latest_year]\n",
"data_small_test = property_data[property_data[\"year\"] == latest_year]\n",
"\n",
"# Remove only the latest year's data from training (keep historical data for this property)\n",
"data_small_train = data_small.drop(data_small_test.index)\n",
"\n",
"print()\n",
"print(f'data_small.shape = {data_small.shape}')\n",
"print(f'data_small_train.shape = {data_small_train.shape}')\n",
"print(f'data_small_test.shape = {data_small_test.shape}')\n",
"print(f\"data_small.shape = {data_small.shape}\")\n",
"print(f\"data_small_train.shape = {data_small_train.shape}\")\n",
"print(f\"data_small_test.shape = {data_small_test.shape}\")\n",
"display(data_small_test)\n",
"data_small.shape[0] == data_small_test.shape[0] + data_small_train.shape[0]"
]
@ -3607,9 +3645,9 @@
"# get latest c in data_small_train\n",
"\n",
"latest_train_address = data_small_train[\n",
" (data_small_train['Postcode'] == postcode) \n",
" & (data_small_train['Address per EPC'] == address) \n",
"].sort_values(by='year')\n",
" (data_small_train[\"Postcode\"] == postcode)\n",
" & (data_small_train[\"Address per EPC\"] == address)\n",
"].sort_values(by=\"year\")\n",
"\n",
"latest_train_address"
]
@ -3630,10 +3668,10 @@
}
],
"source": [
"latest_train_c = latest_train_address['c'].iloc[-1]\n",
"latest_train_pc_avg = latest_train_address['Price per sqm PC AVG'].iloc[-1]\n",
"print(f'Latest c in training data: {latest_train_c:.3f}')\n",
"print(f'Latest price per sqm in training data: {latest_train_pc_avg:.2f}') "
"latest_train_c = latest_train_address[\"c\"].iloc[-1]\n",
"latest_train_pc_avg = latest_train_address[\"Price per sqm PC AVG\"].iloc[-1]\n",
"print(f\"Latest c in training data: {latest_train_c:.3f}\")\n",
"print(f\"Latest price per sqm in training data: {latest_train_pc_avg:.2f}\")"
]
},
{
@ -3654,7 +3692,7 @@
}
],
"source": [
"latest_train_c * latest_train_pc_avg * data_small_test['Total floor area (sqm)'].iloc[0]"
"latest_train_c * latest_train_pc_avg * data_small_test[\"Total floor area (sqm)\"].iloc[0]"
]
},
{

View file

@ -22,7 +22,7 @@ set -euo pipefail
# --demo only compute Bank + TCR, transit only (quick test)
# --- Defaults ---
THREADS=8
THREADS=16
HEAP=16g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times

View file

@ -5,7 +5,7 @@ import { NetworkCache } from './network-cache.js';
const VIEWPORT = { width: 1200, height: 630 };
const NAVIGATION_TIMEOUT = 15_000;
const READY_TIMEOUT = 15_000;
const RENDER_BUFFER_MS = 200;
const RENDER_BUFFER_MS = 500;
const POOL_SIZE = 3;
let browser: Browser | null = null;
@ -226,11 +226,22 @@ export async function initialize(appUrl: string): Promise<void> {
await warmPool();
}
export async function takeScreenshot(url: string): Promise<Buffer> {
export async function takeScreenshot(url: string, authHeader?: string): Promise<Buffer> {
const page = await acquirePage();
const t0 = performance.now();
try {
// Inject Authorization header on API requests so the headless browser
// is authenticated (required for licensed users outside the free zone).
// Page-level routes take precedence over the context-level cache route,
// so only /api/ requests are affected — static assets still use the cache.
if (authHeader) {
await page.route('**/api/**', async (route) => {
const headers = { ...route.request().headers(), authorization: authHeader };
await route.continue({ headers });
});
}
const response = await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: NAVIGATION_TIMEOUT,
@ -251,9 +262,10 @@ export async function takeScreenshot(url: string): Promise<Buffer> {
const t2 = performance.now();
console.log(` Ready: ${(t2 - t1).toFixed(0)}ms`);
// Brief buffer for SwiftShader to finish rendering the WebGL frame.
// Reduced from 500ms → 200ms since tiles now load from the in-memory
// cache and don't need network round-trips.
// Buffer for SwiftShader to finish rendering the WebGL frame after
// __screenshot_ready fires. The frontend uses double-rAF before signaling,
// so one paint cycle has already completed — this is extra safety for
// compositor staging and any residual tile/layer rendering.
await page.waitForTimeout(RENDER_BUFFER_MS);
// JPEG at quality 85: ~3-5x faster encoding than PNG with negligible
@ -265,6 +277,11 @@ export async function takeScreenshot(url: string): Promise<Buffer> {
return Buffer.from(screenshot);
} finally {
// Remove page-level auth route before returning page to pool
// so the next screenshot doesn't inherit stale credentials
if (authHeader) {
await page.unrouteAll({ behavior: 'wait' }).catch(() => {});
}
await releasePage(page);
}
}

View file

@ -57,7 +57,12 @@ app.get('/screenshot', async (req, res) => {
const pagePath = typeof req.query.path === 'string' && req.query.path ? req.query.path : '/';
if (pagePath !== '/') qs.set('path', pagePath);
// Include auth status in cache key so authenticated screenshots
// (with hexagons outside free zone) are cached separately
const authHeader = req.headers.authorization;
if (authHeader) qs.set('_auth', '1');
const cacheKey = cache.buildKey(qs);
qs.delete('_auth');
qs.delete('path');
// Check cache first
@ -74,8 +79,8 @@ app.get('/screenshot', async (req, res) => {
qs.set('screenshot', '1');
const url = `${APP_URL}${pagePath}?${qs}`;
console.log(`Taking screenshot: ${url}`);
const jpeg = await takeScreenshot(url);
console.log(`Taking screenshot: ${url}${authHeader ? ' (authenticated)' : ''}`);
const jpeg = await takeScreenshot(url, authHeader);
// Cache it
cache.set(cacheKey, jpeg);

View file

@ -4,6 +4,7 @@ import sys
from collections import deque
from PIL import Image
def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
img = Image.open(path).convert("RGBA")
pixels = img.load()
@ -43,6 +44,7 @@ def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
img.save(dest)
print(f"Saved to {dest} ({img.size[0]}x{img.size[1]})")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python remove_bg.py <image> [tolerance] [output]")

281
scripts/zoopla_experiment.py Executable file
View file

@ -0,0 +1,281 @@
#!/usr/bin/env -S uv run --project ../finder
"""Zoopla scraping experiment — working prototype using Camoufox.
Key findings:
- Zoopla uses Cloudflare Turnstile (managed interactive challenge)
- Playwright headless Chromium + stealth patches CANNOT beat it
- Camoufox (anti-fingerprinting Firefox fork) PASSES Cloudflare
- Zoopla uses Next.js App Router with React Server Components (RSC)
- Listing data is NOT in __NEXT_DATA__ it's server-rendered in RSC stream
- URL-based location slugs (e.g. /properties/london/) return 0 results
- Must use the search autocomplete (GraphQL: getGeoSuggestion) to resolve
a location, then submit the form to get results
- GraphQL endpoint: api-graphql-lambda.prod.zoopla.co.uk/graphql
- Listings loaded via getTopLeadListingIds + getRareFindLeadListingIds ops
Usage:
uv run --project finder scripts/zoopla_experiment.py [LOCATION]
uv run --project finder scripts/zoopla_experiment.py "Tower Hamlets"
"""
import json
import logging
import re
import sys
import time
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%H:%M:%S",
)
log = logging.getLogger("zoopla-exp")
def scrape_zoopla(location: str = "London", channel: str = "BUY"):
from camoufox.sync_api import Camoufox
tab_label = "Buy" if channel == "BUY" else "Rent"
log.info("Scraping Zoopla: location=%s channel=%s", location, channel)
with Camoufox(headless=True) as browser:
page = browser.new_page()
# Intercept GraphQL responses
graphql_responses = []
def on_resp(response):
url = response.url
ct = response.headers.get("content-type", "")
if "json" in ct and "graphql" in url:
try:
body = response.json()
req = response.request.post_data or ""
graphql_responses.append({"body": body, "req": req})
except Exception:
pass
page.on("response", on_resp)
# Step 1: Load homepage and pass Cloudflare
log.info("Loading Zoopla homepage...")
page.goto("https://www.zoopla.co.uk/", wait_until="domcontentloaded", timeout=60000)
for i in range(20):
if "Just a moment" not in page.title():
break
time.sleep(3)
else:
log.error("Cloudflare did not resolve after 60s")
return []
log.info("Homepage loaded: %s", page.title())
time.sleep(3)
# Step 2: Dismiss cookie consent (shadow DOM)
page.evaluate("""() => {
const aside = document.querySelector('#usercentrics-cmp-ui');
if (aside && aside.shadowRoot) {
const btns = aside.shadowRoot.querySelectorAll('button');
for (const btn of btns) {
if (btn.innerText.includes('Accept')) { btn.click(); return; }
}
}
aside?.remove();
}""")
time.sleep(2)
# Step 3: Select Buy/Rent tab if needed
if channel == "RENT":
rent_tab = page.query_selector('button:has-text("Rent")') or page.query_selector(f'[role="tab"]:has-text("{tab_label}")')
if rent_tab:
rent_tab.click()
time.sleep(1)
# Step 4: Type location into search and select autocomplete suggestion
log.info("Searching for '%s'...", location)
search_input = (
page.query_selector('input[name="autosuggest-input"]')
or page.query_selector('input[type="text"]')
)
if not search_input:
log.error("Could not find search input")
return []
search_input.click()
time.sleep(0.5)
search_input.fill("") # Clear any existing text
search_input.type(location, delay=80)
time.sleep(3)
# Select first autocomplete suggestion
first_option = page.query_selector('[role="option"]')
if first_option:
suggestion_text = first_option.inner_text()
log.info("Selecting suggestion: %s", suggestion_text)
first_option.click()
time.sleep(1)
else:
log.warning("No autocomplete suggestions appeared")
# Step 5: Submit search
search_btn = page.query_selector('button:has-text("Search")')
if search_btn:
search_btn.click()
else:
search_input.press("Enter")
log.info("Waiting for results...")
time.sleep(10)
final_url = page.url
final_title = page.title()
log.info("URL: %s", final_url)
log.info("Title: %s", final_title)
# Step 6: Extract listings from rendered DOM
listings = page.evaluate(r"""() => {
const links = Array.from(document.querySelectorAll(
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
));
const seen = new Set();
const results = [];
for (const link of links) {
const href = link.href;
const match = href.match(/\/details\/(\d+)\//);
if (!match) continue;
const id = match[1];
if (seen.has(id)) continue;
seen.add(id);
// Walk up to find the listing card container
let card = link;
for (let j = 0; j < 10; j++) {
card = card.parentElement;
if (!card) break;
const text = card.innerText || '';
// A listing card should have a price and at least beds or area
if (text.includes('£') && (text.includes('bed') || text.includes('sq ft'))) {
break;
}
}
if (!card) continue;
const text = card.innerText || '';
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
const priceMatch = text.match(/£([\d,]+)/);
const bedsMatch = text.match(/(\d+)\s*beds?/i);
const bathsMatch = text.match(/(\d+)\s*baths?/i);
const recMatch = text.match(/(\d+)\s*reception/i);
const areaMatch = text.match(/([\d,]+)\s*sq\s*ft/i);
// Try to find address usually a line with a postcode or comma-separated location
let address = '';
for (const line of lines) {
if (/[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i.test(line) ||
(line.includes(',') && !line.includes('£') && !line.match(/^\d+ beds?/i))) {
address = line;
break;
}
}
// Tenure
let tenure = '';
if (/freehold/i.test(text)) tenure = 'Freehold';
else if (/leasehold/i.test(text)) tenure = 'Leasehold';
results.push({
id: id,
url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
beds: bedsMatch ? parseInt(bedsMatch[1]) : null,
baths: bathsMatch ? parseInt(bathsMatch[1]) : null,
receptions: recMatch ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address: address,
tenure: tenure,
text_preview: lines.slice(0, 10).join(' | '),
});
}
return results;
}""")
log.info("Extracted %d unique listings from page 1", len(listings))
# Step 7: Check for results count and pagination
body_text = page.inner_text("body")
count_match = re.search(r"([\d,]+)\s+results?", body_text)
total_results = int(count_match.group(1).replace(",", "")) if count_match else len(listings)
log.info("Total results: %d", total_results)
# Step 8: Log GraphQL operations we saw
log.info("GraphQL operations intercepted:")
for gql in graphql_responses:
try:
req = json.loads(gql["req"])
op = req.get("operationName", "?")
log.info(" - %s", op)
except Exception:
pass
# Step 9: Extract cookies for potential curl_cffi reuse
cookies = page.context.cookies()
session_cookies = {
c["name"]: c["value"]
for c in cookies
if "zoopla" in c.get("domain", "") or "cf" in c.get("name", "").lower()
}
ua = page.evaluate("navigator.userAgent")
return {
"url": final_url,
"title": final_title,
"total_results": total_results,
"listings": listings,
"cookies": session_cookies,
"user_agent": ua,
}
def main():
location = sys.argv[1] if len(sys.argv) > 1 else "London"
result = scrape_zoopla(location, channel="BUY")
if not result:
log.error("Scraping failed")
sys.exit(1)
listings = result["listings"]
print(f"\n{'='*60}")
print(f" Zoopla: {result['title']}")
print(f" URL: {result['url']}")
print(f" Total: {result['total_results']} results, {len(listings)} extracted")
print(f"{'='*60}\n")
for i, listing in enumerate(listings):
print(f"--- Listing {i+1}: {listing['url']} ---")
display = {k: v for k, v in listing.items() if k != "text_preview" and v}
print(json.dumps(display, indent=2, ensure_ascii=False))
print()
# Summary stats
prices = [item["price"] for item in listings if item["price"]]
beds = [item["beds"] for item in listings if item["beds"]]
if prices:
print(f"Price range: £{min(prices):,} - £{max(prices):,}")
print(f"Median: £{sorted(prices)[len(prices)//2]:,}")
if beds:
print(f"Bedrooms: {min(beds)}-{max(beds)}")
# Cookie info for reuse
print(f"\nSession cookies ({len(result['cookies'])} cookies)")
print(f"User-Agent: {result['user_agent']}")
if __name__ == "__main__":
main()

View file

@ -9,7 +9,7 @@ clap = { version = "4", features = ["derive", "env"] }
axum = "0.8"
tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
tokio = { version = "1", features = ["full"] }
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16"] }
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16", "round_series"] }
h3o = "0.7"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,335 @@
2026-03-17T07:30:51.418735Z INFO property_map_server: Prometheus metrics initialized
2026-03-17T07:30:51.418950Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-17T07:30:51.418957Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-17T07:30:51.591217Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-17T07:30:51.591228Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-17T07:31:03.482386Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-17T07:31:03.482398Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-17T07:31:06.206982Z INFO property_map_server::data::property: buy listings joined rows=457076
2026-03-17T07:31:06.207003Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-17T07:31:08.031097Z INFO property_map_server::data::property: rent listings joined rows=122594
2026-03-17T07:31:08.031106Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-17T07:32:00.170695Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=457076 rent_listings=122594 total=15783051
2026-03-17T07:32:00.170797Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
2026-03-17T07:32:01.527808Z INFO property_map_server::data::property: Combined data selected rows=15783051
2026-03-17T07:32:01.738022Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-17T07:32:02.164093Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-17T07:32:03.346133Z INFO property_map_server::data::property: Extracting string columns
2026-03-17T07:32:05.803712Z INFO property_map_server::data::property: Building enum features
2026-03-17T07:32:07.359340Z INFO property_map_server::data::property: Extracting renovation history
2026-03-17T07:32:09.567602Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-17T07:32:09.567612Z INFO property_map_server::data::property: Extracting listing features
2026-03-17T07:32:10.194293Z INFO property_map_server::data::property: Listing features extracted properties_with_features=518063
2026-03-17T07:32:10.194304Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-17T07:32:11.130691Z INFO property_map_server::data::property: Building interned strings
2026-03-17T07:32:17.391642Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-17T07:32:20.030170Z INFO property_map_server::data::property: Data loading complete
2026-03-17T07:32:21.686179Z INFO property_map_server: Property data loaded rows=15783051 features=68 enums=13
2026-03-17T07:32:21.686189Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-17T07:32:22.119885Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-17T07:32:22.119896Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-17T07:32:22.577256Z INFO property_map_server::data::property: H3 precomputation complete (15783051 cells)
2026-03-17T07:32:22.577783Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-17T07:32:22.577790Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-17T07:32:22.606628Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-17T07:32:22.723396Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-17T07:32:22.724011Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-17T07:32:22.763121Z INFO property_map_server: POI data loaded pois=678242
2026-03-17T07:32:22.763130Z INFO property_map_server: Building POI spatial grid index
2026-03-17T07:32:22.768959Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-17T07:32:22.768968Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-17T07:32:22.772858Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-17T07:32:22.773855Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-17T07:32:22.774015Z INFO property_map_server: Place data loaded places=3474
2026-03-17T07:32:22.774027Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-17T07:32:22.774032Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-17T07:32:22.787541Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-17T07:32:31.937299Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-17T07:32:32.173875Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-17T07:32:32.174039Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-17T07:32:32.271059Z INFO property_map_server: PMTiles loaded successfully
2026-03-17T07:32:32.315679Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-17T07:32:32.394604Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-17T07:32:32.394776Z INFO property_map_server: Precomputed features response groups=8
2026-03-17T07:32:32.394795Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-17T07:32:32.593635Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-17T07:32:32.598562Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-17T07:32:32.602615Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-17T07:32:32.700044Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-17T07:32:32.703401Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-17T07:32:32.703422Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-17T07:32:32.703435Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-17T07:32:33.124089Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-17T07:32:33.129130Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-17T07:32:33.136319Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-17T07:32:33.199470Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1869
2026-03-17T07:32:33.199512Z INFO property_map_server: Travel time store loaded modes=4
2026-03-17T07:32:33.199568Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-17T07:32:33.247029Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:32:41.343709Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-17T07:32:41.343741Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-17T07:33:33.247983Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:34:33.248115Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:35:33.247077Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:36:33.246775Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:37:33.245462Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:38:33.245965Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:39:33.245978Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:40:33.246783Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:41:33.245498Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:42:33.245587Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:43:33.245907Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:44:33.246696Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:45:33.246006Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T07:46:30.259530Z INFO property_map_server: Prometheus metrics initialized
2026-03-17T07:46:30.259726Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-17T07:46:30.259735Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-17T07:46:30.325086Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-17T07:46:30.325097Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-17T07:46:32.757459Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-17T07:46:32.757469Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-17T07:46:33.043727Z INFO property_map_server::data::property: buy listings joined rows=457076
2026-03-17T07:46:33.043750Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-17T07:46:33.139537Z INFO property_map_server::data::property: rent listings joined rows=122594
2026-03-17T07:46:33.139545Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-17T08:31:50.056528Z INFO property_map_server: Prometheus metrics initialized
2026-03-17T08:31:50.056716Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-17T08:31:50.056723Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-17T08:31:50.259958Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-17T08:31:50.259971Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-17T08:32:02.569149Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-17T08:32:02.569201Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-17T08:32:03.699632Z INFO property_map_server::data::property: buy listings joined rows=457076
2026-03-17T08:32:03.699651Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-17T08:32:03.826074Z INFO property_map_server::data::property: rent listings joined rows=122594
2026-03-17T08:32:03.826084Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-17T08:32:43.785403Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=457076 rent_listings=122594 total=15783051
2026-03-17T08:32:43.785499Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
2026-03-17T08:32:45.220814Z INFO property_map_server::data::property: Combined data selected rows=15783051
2026-03-17T08:32:45.421342Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-17T08:32:45.834125Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-17T08:32:47.061266Z INFO property_map_server::data::property: Extracting string columns
2026-03-17T08:32:49.344991Z INFO property_map_server::data::property: Building enum features
2026-03-17T08:32:50.754854Z INFO property_map_server::data::property: Extracting renovation history
2026-03-17T08:32:52.906620Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-17T08:32:52.906629Z INFO property_map_server::data::property: Extracting listing features
2026-03-17T08:32:53.563050Z INFO property_map_server::data::property: Listing features extracted properties_with_features=518063
2026-03-17T08:32:53.563059Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-17T08:32:54.502830Z INFO property_map_server::data::property: Building interned strings
2026-03-17T08:33:00.593312Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-17T08:33:03.178312Z INFO property_map_server::data::property: Data loading complete
2026-03-17T08:33:04.964374Z INFO property_map_server: Property data loaded rows=15783051 features=68 enums=13
2026-03-17T08:33:04.964383Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-17T08:33:05.065094Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-17T08:33:05.065102Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-17T08:33:05.486703Z INFO property_map_server::data::property: H3 precomputation complete (15783051 cells)
2026-03-17T08:33:05.486729Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-17T08:33:05.486734Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-17T08:33:05.529351Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-17T08:33:05.642021Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-17T08:33:05.642611Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-17T08:33:05.681563Z INFO property_map_server: POI data loaded pois=678242
2026-03-17T08:33:05.681574Z INFO property_map_server: Building POI spatial grid index
2026-03-17T08:33:05.687162Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-17T08:33:05.687169Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-17T08:33:05.705798Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-17T08:33:05.706609Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-17T08:33:05.706675Z INFO property_map_server: Place data loaded places=3474
2026-03-17T08:33:05.706689Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-17T08:33:05.706695Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-17T08:33:05.780250Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-17T08:33:14.655514Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-17T08:33:14.888462Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-17T08:33:14.888478Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-17T08:33:15.021983Z INFO property_map_server: PMTiles loaded successfully
2026-03-17T08:33:15.065572Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-17T08:33:15.140720Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-17T08:33:15.141331Z INFO property_map_server: Precomputed features response groups=8
2026-03-17T08:33:15.141349Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-17T08:33:15.246791Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-17T08:33:15.254863Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-17T08:33:15.258892Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-17T08:33:15.329192Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-17T08:33:15.333036Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-17T08:33:15.333055Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-17T08:33:15.333066Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-17T08:33:15.398969Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-17T08:33:15.403743Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-17T08:33:15.404640Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-17T08:33:15.414586Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1869
2026-03-17T08:33:15.414612Z INFO property_map_server: Travel time store loaded modes=4
2026-03-17T08:33:15.414666Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-17T08:33:16.003045Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:33:19.581012Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-17T08:33:19.581049Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-17T08:33:22.213990Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-17T08:33:22.216578Z INFO property_map_server::routes::features: GET /api/features
2026-03-17T08:33:22.227193Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-17T08:33:22.232847Z INFO property_map_server::routes::features: GET /api/features
2026-03-17T08:33:22.409378Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=145554 parallel=true cells_before_filter=455 cells_after_filter=297 truncated=false bounds=51.4896,-0.1648,51.5404,-0.0952 filters=1 filters_raw="Listing status:Historical sale" fields=0 travel_entries=0 grid_ms=0.4 agg_ms=7.5 json_ms=0.8 total_ms=8.7
2026-03-17T08:33:22.446379Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=145554 parallel=true cells_before_filter=455 cells_after_filter=297 truncated=false bounds=51.4896,-0.1648,51.5404,-0.0952 filters=1 filters_raw="Listing status:Historical sale" fields=0 travel_entries=0 grid_ms=0.1 agg_ms=4.1 json_ms=0.5 total_ms=4.7
2026-03-17T08:34:15.461433Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:34:29.710796Z INFO property_map_server::routes::features: GET /api/features
2026-03-17T08:34:29.713513Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-17T08:34:30.274542Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=145554 parallel=true cells_before_filter=455 cells_after_filter=297 truncated=false bounds=51.4896,-0.1648,51.5404,-0.0952 filters=1 filters_raw="Listing status:Historical sale" fields=0 travel_entries=0 grid_ms=0.1 agg_ms=2.5 json_ms=0.6 total_ms=3.2
2026-03-17T08:34:31.462250Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89195da4987ffff resolution=9 total_count=243 filters=1 filters_raw="Listing status:Historical sale" ms=0.2
2026-03-17T08:34:31.674788Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=82474 parallel=true cells_before_filter=296 cells_after_filter=201 truncated=false bounds=51.4896,-0.1524,51.5404,-0.1076 filters=1 filters_raw="Listing status:Historical sale" fields=0 travel_entries=0 grid_ms=0.1 agg_ms=0.9 json_ms=0.5 total_ms=1.5
2026-03-17T08:34:32.542179Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89195da4d33ffff resolution=9 total_count=746 filters=1 filters_raw="Listing status:Historical sale" ms=0.5
2026-03-17T08:34:34.469487Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89195da4d33ffff resolution=9 total_count=6 filters=1 filters_raw="Listing status:For rent" ms=0.1
2026-03-17T08:34:34.620706Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=82474 parallel=true cells_before_filter=274 cells_after_filter=196 truncated=false bounds=51.4896,-0.1524,51.5404,-0.1076 filters=1 filters_raw="Listing status:For rent" fields=0 travel_entries=0 grid_ms=0.1 agg_ms=0.7 json_ms=0.4 total_ms=1.1
2026-03-17T08:35:15.464691Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:36:15.461317Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:37:15.462465Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:38:15.461428Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:39:15.463264Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:40:15.466916Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:41:15.463402Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:42:15.462539Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:43:15.461880Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:44:15.462263Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:45:15.461882Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:46:15.462228Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:47:15.462476Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:47:28.935265Z INFO property_map_server: Prometheus metrics initialized
2026-03-17T08:47:28.935449Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-17T08:47:28.935457Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-17T08:47:29.007775Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-17T08:47:29.007785Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-17T08:47:31.674791Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-17T08:47:31.674802Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-17T08:47:31.972527Z INFO property_map_server::data::property: buy listings joined rows=457076
2026-03-17T08:47:31.972545Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-17T08:47:32.082470Z INFO property_map_server::data::property: rent listings joined rows=122594
2026-03-17T08:47:32.082480Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-17T08:47:43.806418Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=457076 rent_listings=122594 total=15783051
2026-03-17T08:47:43.806509Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
2026-03-17T08:47:45.135285Z INFO property_map_server::data::property: Combined data selected rows=15783051
2026-03-17T08:47:45.326377Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-17T08:47:45.712528Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-17T08:47:46.876195Z INFO property_map_server::data::property: Extracting string columns
2026-03-17T08:47:49.145516Z INFO property_map_server::data::property: Building enum features
2026-03-17T08:47:50.661409Z INFO property_map_server::data::property: Extracting renovation history
2026-03-17T08:47:52.947453Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-17T08:47:52.947462Z INFO property_map_server::data::property: Extracting listing features
2026-03-17T08:47:53.599162Z INFO property_map_server::data::property: Listing features extracted properties_with_features=518063
2026-03-17T08:47:53.599171Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-17T08:47:54.619942Z INFO property_map_server::data::property: Building interned strings
2026-03-17T08:48:00.802774Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-17T08:48:03.547995Z INFO property_map_server::data::property: Data loading complete
2026-03-17T08:48:05.049275Z INFO property_map_server: Property data loaded rows=15783051 features=68 enums=13
2026-03-17T08:48:05.049293Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-17T08:48:05.459943Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-17T08:48:05.459953Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-17T08:48:05.865563Z INFO property_map_server::data::property: H3 precomputation complete (15783051 cells)
2026-03-17T08:48:05.865637Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-17T08:48:05.865651Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-17T08:48:05.886166Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-17T08:48:06.006159Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-17T08:48:06.006744Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-17T08:48:06.043360Z INFO property_map_server: POI data loaded pois=678242
2026-03-17T08:48:06.043368Z INFO property_map_server: Building POI spatial grid index
2026-03-17T08:48:06.048757Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-17T08:48:06.048766Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-17T08:48:06.049291Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-17T08:48:06.050002Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-17T08:48:06.050053Z INFO property_map_server: Place data loaded places=3474
2026-03-17T08:48:06.050061Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-17T08:48:06.050064Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-17T08:48:06.062151Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-17T08:48:15.297171Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-17T08:48:15.545357Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-17T08:48:15.545379Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-17T08:48:15.640450Z INFO property_map_server: PMTiles loaded successfully
2026-03-17T08:48:15.684715Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-17T08:48:15.789766Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-17T08:48:15.790261Z INFO property_map_server: Precomputed features response groups=8
2026-03-17T08:48:15.790275Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-17T08:48:15.852396Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-17T08:48:15.854872Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-17T08:48:15.858800Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-17T08:48:15.911308Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-17T08:48:15.915275Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-17T08:48:15.915303Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-17T08:48:15.915316Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-17T08:48:16.153964Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-17T08:48:16.155556Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-17T08:48:16.156564Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-17T08:48:16.168132Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1869
2026-03-17T08:48:16.168166Z INFO property_map_server: Travel time store loaded modes=4
2026-03-17T08:48:16.168228Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-17T08:48:16.774064Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:48:18.295547Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-17T08:48:18.295586Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-17T08:49:16.216499Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:50:16.215664Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:51:16.214094Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:52:16.215038Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-17T08:53:00.492875Z INFO property_map_server: Prometheus metrics initialized
2026-03-17T08:53:00.493149Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-17T08:53:00.493156Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-17T08:53:00.728565Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-17T08:53:00.728575Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-17T08:53:03.595748Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-17T08:53:03.595759Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-17T08:53:03.975669Z INFO property_map_server::data::property: buy listings joined rows=457076
2026-03-17T08:53:03.975687Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-17T08:53:04.083853Z INFO property_map_server::data::property: rent listings joined rows=122594
2026-03-17T08:53:04.083863Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-17T08:53:19.531799Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=457076 rent_listings=122594 total=15783051
2026-03-17T08:53:19.531893Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
2026-03-17T08:53:20.977401Z INFO property_map_server::data::property: Combined data selected rows=15783051
2026-03-17T08:53:21.166389Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-17T08:53:21.555895Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-17T08:53:22.777545Z INFO property_map_server::data::property: Extracting string columns
2026-03-17T08:53:25.067611Z INFO property_map_server::data::property: Building enum features
2026-03-17T08:53:26.433346Z INFO property_map_server::data::property: Extracting renovation history
2026-03-17T08:53:28.667594Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-17T08:53:28.667602Z INFO property_map_server::data::property: Extracting listing features
2026-03-17T08:53:29.309247Z INFO property_map_server::data::property: Listing features extracted properties_with_features=518063
2026-03-17T08:53:29.309255Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-17T08:53:30.205482Z INFO property_map_server::data::property: Building interned strings
2026-03-17T08:53:36.247881Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-17T08:53:38.758705Z INFO property_map_server::data::property: Data loading complete
2026-03-17T08:53:40.180446Z INFO property_map_server: Property data loaded rows=15783051 features=68 enums=13
2026-03-17T08:53:40.180455Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-17T08:53:40.577820Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-17T08:53:40.577828Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-17T08:53:40.972135Z INFO property_map_server::data::property: H3 precomputation complete (15783051 cells)
2026-03-17T08:53:40.972155Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-17T08:53:40.972161Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-17T08:53:41.018292Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-17T08:53:41.129204Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-17T08:53:41.129769Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-17T08:53:41.168005Z INFO property_map_server: POI data loaded pois=678242
2026-03-17T08:53:41.168011Z INFO property_map_server: Building POI spatial grid index
2026-03-17T08:53:41.173291Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-17T08:53:41.173297Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-17T08:53:41.175229Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-17T08:53:41.176075Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-17T08:53:41.176126Z INFO property_map_server: Place data loaded places=3474
2026-03-17T08:53:41.176134Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-17T08:53:41.176137Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-17T08:53:41.178186Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-17T08:53:51.542107Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-17T08:53:51.769077Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-17T08:53:51.769098Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-17T08:53:51.769313Z INFO property_map_server: PMTiles loaded successfully
2026-03-17T08:53:51.811454Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-17T08:53:51.881249Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-17T08:53:51.881405Z INFO property_map_server: Precomputed features response groups=8
2026-03-17T08:53:51.881422Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-17T08:53:51.933372Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-17T08:53:51.935544Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-17T08:53:51.938605Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-17T08:53:51.988188Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-17T08:53:51.992737Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-17T08:53:51.992761Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-17T08:53:51.992778Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-17T08:53:52.012596Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-17T08:53:52.012912Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-17T08:53:52.013296Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-17T08:53:52.015215Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1869
2026-03-17T08:53:52.015233Z INFO property_map_server: Travel time store loaded modes=4
2026-03-17T08:53:52.015276Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-17T08:53:54.777281Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-17T08:53:54.777322Z INFO property_map_server: Server listening on 0.0.0.0:8001

View file

@ -18,8 +18,6 @@ pub struct PocketBaseUser {
pub id: String,
pub email: String,
#[serde(default)]
pub verified: bool,
#[serde(default)]
pub is_admin: bool,
#[serde(default)]
pub subscription: String,
@ -57,9 +55,13 @@ impl TokenCache {
// Evict expired entries first
let now = Instant::now();
map.retain(|_, (_, created)| now.duration_since(*created).as_secs() < TOKEN_TTL_SECS);
// If still too many, clear all
// If still too many, evict oldest half instead of clearing all
// (avoids thundering herd where every request re-validates at once)
if map.len() >= MAX_CACHE_ENTRIES {
map.clear();
let mut ages: Vec<Instant> = map.values().map(|(_, created)| *created).collect();
ages.sort();
let median = ages[ages.len() / 2];
map.retain(|_, (_, created)| *created >= median);
}
}
map.insert(token, (user, Instant::now()));

View file

@ -28,8 +28,3 @@ pub const SERVICE_CALL_TIMEOUT: u64 = 120;
/// Inner London free zone bounds (south, west, north, east) — roughly zone 1.
/// Users without a license can only query data within these bounds.
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.44, -0.31, 51.59, 0.05);
/// Exact demo bounds (south, west, north, east) sent by the homepage ScrollStory.
/// Requests matching these exact values bypass the license check so the
/// animation works for anonymous visitors. Only this specific viewport is allowed.
pub const DEMO_BOUNDS: (f64, f64, f64, f64) = (46.0, -12.0, 56.5, 12.0);

View file

@ -23,7 +23,7 @@ pub struct POIData {
/// Byte offset into `id_buffer` where each row's ID starts.
id_offsets: Vec<u32>,
/// Length in bytes of each row's ID.
id_lengths: Vec<u8>,
id_lengths: Vec<u16>,
pub group: InternedColumn,
pub category: InternedColumn,
pub name: Vec<String>,
@ -101,7 +101,7 @@ impl POIData {
let mut id_lengths = Vec::with_capacity(row_count);
for s in &id_raw {
let offset = id_buffer.len() as u32;
let length = s.len().min(u8::MAX as usize) as u8;
let length = s.len().min(u16::MAX as usize) as u16;
id_offsets.push(offset);
id_lengths.push(length);
id_buffer.push_str(&s[..length as usize]);

View file

@ -128,6 +128,7 @@ impl PostcodeData {
// Compute centroid across all vertices from all rings
let total_vertices: usize = rings.iter().map(|ring| ring.len()).sum();
let centroid = if total_vertices == 0 {
tracing::warn!(postcode = %postcode, "Postcode polygon has zero vertices, defaulting centroid to (0,0)");
(0.0, 0.0)
} else {
let mut sum_lat: f32 = 0.0;
@ -168,7 +169,12 @@ impl PostcodeData {
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
}
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids, local_aabbs))
Ok::<_, anyhow::Error>((
local_postcodes,
local_polygons,
local_centroids,
local_aabbs,
))
})
.collect::<Result<Vec<_>, _>>()?;

View file

@ -538,6 +538,20 @@ impl PropertyData {
Ok(joined)
};
let listings_buy = load_listings(listings_buy_path, "buy")?;
// Derive "Asking price per sqm" if not already present
let listings_buy = if listings_buy.schema().get("Asking price per sqm").is_none() {
listings_buy
.lazy()
.with_column(
(col("Asking price").cast(DataType::Float64) / col("Total floor area (sqm)"))
.round(0)
.alias("Asking price per sqm"),
)
.collect()
.context("Failed to derive Asking price per sqm")?
} else {
listings_buy
};
let listings_rent = load_listings(listings_rent_path, "rent")?;
// Concatenate all rows into a single DataFrame

View file

@ -236,19 +236,24 @@ impl TravelTimeStore {
}
}
/// Slugify a place name to match travel time file naming convention.
/// "Abbey Hey" → "abbey-hey", "A'Bhuaile Ghlas" → "a-bhuaile-ghlas"
/// Slugify a place name to match Java `originFilename()` convention.
/// Strips non-alphanumeric chars (except spaces/hyphens) first, then collapses
/// whitespace to hyphens. This matches Java's `replaceAll("[^a-z0-9 -]", "")`
/// followed by `replaceAll("\\s+", "-")`.
/// "King's Cross" → "kings-cross", "Abbey Hey" → "abbey-hey"
pub fn slugify(name: &str) -> String {
let mut result = String::with_capacity(name.len());
let mut last_was_hyphen = true; // Start true to skip leading hyphens
for ch in name.chars() {
let lower = ch.to_ascii_lowercase();
if ch.is_ascii_alphanumeric() {
result.push(ch.to_ascii_lowercase());
result.push(lower);
last_was_hyphen = false;
} else if !last_was_hyphen {
} else if (ch == ' ' || ch == '-') && !last_was_hyphen {
result.push('-');
last_was_hyphen = true;
}
// Other non-alphanumeric chars (apostrophes, ampersands, etc.) are stripped
}
if result.ends_with('-') {
result.pop();
@ -266,6 +271,32 @@ mod tests {
assert_eq!(slugify("London"), "london");
}
#[test]
fn slugify_apostrophes_stripped() {
assert_eq!(slugify("King's Cross"), "kings-cross");
assert_eq!(
slugify("Earl's Court tube station"),
"earls-court-tube-station"
);
assert_eq!(slugify("St. Paul's tube station"), "st-pauls-tube-station");
assert_eq!(
slugify("Regent's Park tube station"),
"regents-park-tube-station"
);
}
#[test]
fn slugify_special_chars_stripped() {
assert_eq!(
slugify("Cobham & Stoke d'Abernon railway station"),
"cobham-stoke-dabernon-railway-station"
);
assert_eq!(
slugify("Ravenglass (R&ER) railway station"),
"ravenglass-rer-railway-station"
);
}
#[test]
fn strip_numeric_prefix_basic() {
assert_eq!(

View file

@ -68,9 +68,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
features: &[
FeatureConfig {
name: "Last known price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Most recent sale price from the Land Registry",
@ -79,15 +79,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["historical"],
linked: "",
},
FeatureConfig {
name: "Estimated current price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Inflation-adjusted estimate of the current property value",
@ -96,7 +96,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["historical"],
linked: "Asking price",
},
@ -252,9 +252,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
},
FeatureConfig {
name: "Asking price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Listed asking price for properties currently for sale",
@ -263,15 +263,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["buy"],
linked: "Estimated current price",
},
FeatureConfig {
name: "Asking rent (monthly)",
bounds: Bounds::Fixed {
min: 0.0,
max: 10_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 50.0,
description: "Listed monthly rent for properties currently for rent",
@ -280,7 +280,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "/mo",
raw: false,
absolute: true,
absolute: false,
modes: &["rent"],
linked: "Estimated monthly rent",
},
@ -870,7 +870,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as South Asian",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Indian, Pakistani, Bangladeshi, or any other Asian background.",
source: "ethnicity",
@ -887,7 +887,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as East Asian",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Chinese.",
source: "ethnicity",
@ -904,7 +904,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Black",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Black, Black British, Caribbean, or African.",
source: "ethnicity",
@ -921,7 +921,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Mixed or Multiple ethnic groups",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Mixed or Multiple ethnic groups (White and Black Caribbean, White and Black African, White and Asian, or any other Mixed or Multiple background).",
source: "ethnicity",
@ -938,7 +938,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Other ethnic group",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Other ethnic group (Arab or any other ethnic group not covered by the main categories).",
source: "ethnicity",

Some files were not shown because too many files have changed in this diff Show more