This commit is contained in:
Andras Schmelczer 2026-03-15 21:22:28 +00:00
parent 479ef92236
commit c38d654ac7
44 changed files with 2526 additions and 701 deletions

View file

@ -813,8 +813,14 @@
], ],
"source": [ "source": [
"# Build area lookup from both sets\n", "# Build area lookup from both sets\n",
"areas_before = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in no_green[\"features\"]}\n", "areas_before = {\n",
"areas_after = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in with_green[\"features\"]}\n", " f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
" for f in no_green[\"features\"]\n",
"}\n",
"areas_after = {\n",
" f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
" for f in with_green[\"features\"]\n",
"}\n",
"\n", "\n",
"# Compute percentage removed\n", "# Compute percentage removed\n",
"diffs = []\n", "diffs = []\n",
@ -1161,16 +1167,23 @@
"\n", "\n",
"colormap = cm.LinearColormap(\n", "colormap = cm.LinearColormap(\n",
" colors=[\"#ffffcc\", \"#fd8d3c\", \"#e31a1c\", \"#800026\"],\n", " colors=[\"#ffffcc\", \"#fd8d3c\", \"#e31a1c\", \"#800026\"],\n",
" vmin=0, vmax=min(max_pct, 90),\n", " vmin=0,\n",
" vmax=min(max_pct, 90),\n",
" caption=\"% area removed by greenspace\",\n", " caption=\"% area removed by greenspace\",\n",
")\n", ")\n",
"\n", "\n",
"\n",
"# Show original boundaries, colored by how much was removed\n", "# Show original boundaries, colored by how much was removed\n",
"def style_by_removal(feature):\n", "def style_by_removal(feature):\n",
" pc = feature[\"properties\"][\"postcode\"]\n", " pc = feature[\"properties\"][\"postcode\"]\n",
" pct = diff_lookup.get(pc, 0)\n", " pct = diff_lookup.get(pc, 0)\n",
" if pct <= 1:\n", " if pct <= 1:\n",
" return {\"fillColor\": \"#cccccc\", \"color\": \"#999\", \"weight\": 0.5, \"fillOpacity\": 0.15}\n", " return {\n",
" \"fillColor\": \"#cccccc\",\n",
" \"color\": \"#999\",\n",
" \"weight\": 0.5,\n",
" \"fillOpacity\": 0.15,\n",
" }\n",
" return {\n", " return {\n",
" \"fillColor\": colormap(min(pct, 90)),\n", " \"fillColor\": colormap(min(pct, 90)),\n",
" \"color\": \"white\",\n", " \"color\": \"white\",\n",
@ -1178,6 +1191,7 @@
" \"fillOpacity\": 0.6,\n", " \"fillOpacity\": 0.6,\n",
" }\n", " }\n",
"\n", "\n",
"\n",
"folium.GeoJson(\n", "folium.GeoJson(\n",
" no_green,\n", " no_green,\n",
" name=\"Greenspace removal %\",\n", " name=\"Greenspace removal %\",\n",

View file

@ -54,25 +54,32 @@
" ape = np.abs(p - a) / a\n", " ape = np.abs(p - a) / a\n",
" err = p - a\n", " err = p - a\n",
" return {\n", " return {\n",
" \"MdAPE (%)\": f\"{np.median(ape)*100:.1f}\",\n", " \"MdAPE (%)\": f\"{np.median(ape) * 100:.1f}\",\n",
" \"% within 10%\": f\"{np.mean(ape <= 0.10)*100:.1f}\",\n", " \"% within 10%\": f\"{np.mean(ape <= 0.10) * 100:.1f}\",\n",
" \"% within 20%\": f\"{np.mean(ape <= 0.20)*100:.1f}\",\n", " \"% within 20%\": f\"{np.mean(ape <= 0.20) * 100:.1f}\",\n",
" \"% within 30%\": f\"{np.mean(ape <= 0.30)*100:.1f}\",\n", " \"% within 30%\": f\"{np.mean(ape <= 0.30) * 100:.1f}\",\n",
" \"MAE (\\u00a3)\": f\"{np.mean(np.abs(err)):,.0f}\",\n", " \"MAE (\\u00a3)\": f\"{np.mean(np.abs(err)):,.0f}\",\n",
" \"Mean signed error (\\u00a3)\": f\"{np.mean(err):+,.0f}\",\n", " \"Mean signed error (\\u00a3)\": f\"{np.mean(err):+,.0f}\",\n",
" \"n\": f\"{len(a):,}\",\n", " \"n\": f\"{len(a):,}\",\n",
" }\n", " }\n",
"\n", "\n",
"\n",
"actual = backtest_df[\"actual_price\"].to_numpy().astype(np.float64)\n", "actual = backtest_df[\"actual_price\"].to_numpy().astype(np.float64)\n",
"metrics = {\n", "metrics = {\n",
" \"Naive\": compute_metrics(actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)),\n", " \"Naive\": compute_metrics(\n",
" \"Index\": compute_metrics(actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)),\n", " actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)\n",
" ),\n",
" \"Index\": compute_metrics(\n",
" actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)\n",
" ),\n",
"}\n", "}\n",
"\n", "\n",
"metrics_table = pl.DataFrame([\n", "metrics_table = pl.DataFrame(\n",
" {\"Metric\": k, **{stage: v[k] for stage, v in metrics.items()}}\n", " [\n",
" for k in list(metrics[\"Naive\"].keys())\n", " {\"Metric\": k, **{stage: v[k] for stage, v in metrics.items()}}\n",
"])\n", " for k in list(metrics[\"Naive\"].keys())\n",
" ]\n",
")\n",
"metrics_table" "metrics_table"
] ]
}, },
@ -91,8 +98,7 @@
"source": [ "source": [
"# National index (average across all sectors weighted by n_pairs)\n", "# National index (average across all sectors weighted by n_pairs)\n",
"national = (\n", "national = (\n",
" index_df\n", " index_df.group_by(\"year\")\n",
" .group_by(\"year\")\n",
" .agg(\n", " .agg(\n",
" (pl.col(\"log_index\") * pl.col(\"n_pairs\")).sum() / pl.col(\"n_pairs\").sum(),\n", " (pl.col(\"log_index\") * pl.col(\"n_pairs\")).sum() / pl.col(\"n_pairs\").sum(),\n",
" )\n", " )\n",
@ -107,14 +113,23 @@
"\n", "\n",
"# If not enough, pick some with high/low n_pairs\n", "# If not enough, pick some with high/low n_pairs\n",
"if len(sample_sectors) < 3:\n", "if len(sample_sectors) < 3:\n",
" sector_counts = index_df.group_by(\"sector\").agg(pl.col(\"n_pairs\").first()).sort(\"n_pairs\", descending=True)\n", " sector_counts = (\n",
" index_df.group_by(\"sector\")\n",
" .agg(pl.col(\"n_pairs\").first())\n",
" .sort(\"n_pairs\", descending=True)\n",
" )\n",
" top = sector_counts.head(2)[\"sector\"].to_list()\n", " top = sector_counts.head(2)[\"sector\"].to_list()\n",
" bottom = sector_counts.filter(pl.col(\"n_pairs\") > 0).tail(2)[\"sector\"].to_list()\n", " bottom = sector_counts.filter(pl.col(\"n_pairs\") > 0).tail(2)[\"sector\"].to_list()\n",
" sample_sectors = list(set(sample_sectors + top + bottom))[:5]\n", " sample_sectors = list(set(sample_sectors + top + bottom))[:5]\n",
"\n", "\n",
"samples = index_df.filter(pl.col(\"sector\").is_in(sample_sectors))\n", "samples = index_df.filter(pl.col(\"sector\").is_in(sample_sectors))\n",
"\n", "\n",
"combined = pl.concat([national.select(\"sector\", \"year\", \"log_index\"), samples.select(\"sector\", \"year\", \"log_index\")])\n", "combined = pl.concat(\n",
" [\n",
" national.select(\"sector\", \"year\", \"log_index\"),\n",
" samples.select(\"sector\", \"year\", \"log_index\"),\n",
" ]\n",
")\n",
"\n", "\n",
"# Normalize: index = 100 at base year (earliest available)\n", "# Normalize: index = 100 at base year (earliest available)\n",
"combined = combined.with_columns(\n", "combined = combined.with_columns(\n",
@ -122,7 +137,10 @@
")\n", ")\n",
"\n", "\n",
"fig = px.line(\n", "fig = px.line(\n",
" combined.to_pandas(), x=\"year\", y=\"index_100\", color=\"sector\",\n", " combined.to_pandas(),\n",
" x=\"year\",\n",
" y=\"index_100\",\n",
" color=\"sector\",\n",
" title=\"Repeat-Sales Price Index (base year = 100)\",\n", " title=\"Repeat-Sales Price Index (base year = 100)\",\n",
" labels={\"index_100\": \"Index (base=100)\", \"year\": \"Year\"},\n", " labels={\"index_100\": \"Index (base=100)\", \"year\": \"Year\"},\n",
")\n", ")\n",
@ -155,8 +173,10 @@
"\n", "\n",
"fig.update_layout(\n", "fig.update_layout(\n",
" title=\"Absolute Percentage Error Distribution\",\n", " title=\"Absolute Percentage Error Distribution\",\n",
" xaxis_title=\"APE (%)\", yaxis_title=\"Count\",\n", " xaxis_title=\"APE (%)\",\n",
" barmode=\"overlay\", height=500,\n", " yaxis_title=\"Count\",\n",
" barmode=\"overlay\",\n",
" height=500,\n",
")\n", ")\n",
"fig.show()" "fig.show()"
] ]
@ -183,17 +203,27 @@
"pred = sample[\"predicted\"].to_numpy().astype(np.float64)\n", "pred = sample[\"predicted\"].to_numpy().astype(np.float64)\n",
"\n", "\n",
"fig = go.Figure()\n", "fig = go.Figure()\n",
"fig.add_trace(go.Scattergl(\n", "fig.add_trace(\n",
" x=actual_sample, y=pred, mode=\"markers\",\n", " go.Scattergl(\n",
" marker=dict(size=2, opacity=0.3), name=\"Index\",\n", " x=actual_sample,\n",
"))\n", " y=pred,\n",
" mode=\"markers\",\n",
" marker=dict(size=2, opacity=0.3),\n",
" name=\"Index\",\n",
" )\n",
")\n",
"# 45-degree reference line\n", "# 45-degree reference line\n",
"min_val = max(10_000, min(actual_sample.min(), np.nanmin(pred)))\n", "min_val = max(10_000, min(actual_sample.min(), np.nanmin(pred)))\n",
"max_val = min(5_000_000, max(actual_sample.max(), np.nanmax(pred)))\n", "max_val = min(5_000_000, max(actual_sample.max(), np.nanmax(pred)))\n",
"fig.add_trace(go.Scatter(\n", "fig.add_trace(\n",
" x=[min_val, max_val], y=[min_val, max_val],\n", " go.Scatter(\n",
" mode=\"lines\", line=dict(color=\"red\", dash=\"dash\"), showlegend=False,\n", " x=[min_val, max_val],\n",
"))\n", " y=[min_val, max_val],\n",
" mode=\"lines\",\n",
" line=dict(color=\"red\", dash=\"dash\"),\n",
" showlegend=False,\n",
" )\n",
")\n",
"fig.update_xaxes(type=\"log\", title_text=\"Actual (\\u00a3)\")\n", "fig.update_xaxes(type=\"log\", title_text=\"Actual (\\u00a3)\")\n",
"fig.update_yaxes(type=\"log\", title_text=\"Predicted (\\u00a3)\")\n", "fig.update_yaxes(type=\"log\", title_text=\"Predicted (\\u00a3)\")\n",
"fig.update_layout(title=\"Predicted vs Actual Price (log scale, 10K sample)\", height=500)\n", "fig.update_layout(title=\"Predicted vs Actual Price (log scale, 10K sample)\", height=500)\n",
@ -234,12 +264,22 @@
" for name, arr in [(\"Naive\", naive), (\"Index\", pred)]:\n", " for name, arr in [(\"Naive\", naive), (\"Index\", pred)]:\n",
" ape = np.abs(arr[mask] - actual[mask]) / actual[mask]\n", " ape = np.abs(arr[mask] - actual[mask]) / actual[mask]\n",
" valid = np.isfinite(ape)\n", " valid = np.isfinite(ape)\n",
" rows.append({\"Price Band\": label, \"Method\": name, \"MdAPE (%)\": float(np.median(ape[valid]) * 100)})\n", " rows.append(\n",
" {\n",
" \"Price Band\": label,\n",
" \"Method\": name,\n",
" \"MdAPE (%)\": float(np.median(ape[valid]) * 100),\n",
" }\n",
" )\n",
"\n", "\n",
"band_df = pl.DataFrame(rows)\n", "band_df = pl.DataFrame(rows)\n",
"fig = px.bar(\n", "fig = px.bar(\n",
" band_df.to_pandas(), x=\"Price Band\", y=\"MdAPE (%)\", color=\"Method\",\n", " band_df.to_pandas(),\n",
" barmode=\"group\", title=\"MdAPE by Price Band\",\n", " x=\"Price Band\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" barmode=\"group\",\n",
" title=\"MdAPE by Price Band\",\n",
" category_orders={\"Price Band\": [b[2] for b in bands]},\n", " category_orders={\"Price Band\": [b[2] for b in bands]},\n",
")\n", ")\n",
"fig.update_layout(height=450)\n", "fig.update_layout(height=450)\n",
@ -264,7 +304,9 @@
")\n", ")\n",
"\n", "\n",
"# Top 20 areas by volume\n", "# Top 20 areas by volume\n",
"top_areas = bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n", "top_areas = (\n",
" bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n",
")\n",
"\n", "\n",
"actual_np = bt[\"actual_price\"].to_numpy().astype(np.float64)\n", "actual_np = bt[\"actual_price\"].to_numpy().astype(np.float64)\n",
"pred_np = bt[\"predicted\"].to_numpy().astype(np.float64)\n", "pred_np = bt[\"predicted\"].to_numpy().astype(np.float64)\n",
@ -279,12 +321,18 @@
" p = arr[mask]\n", " p = arr[mask]\n",
" valid = np.isfinite(p) & (a > 0)\n", " valid = np.isfinite(p) & (a > 0)\n",
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n", " ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
" rows.append({\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n", " rows.append(\n",
" {\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)}\n",
" )\n",
"\n", "\n",
"area_df = pl.DataFrame(rows)\n", "area_df = pl.DataFrame(rows)\n",
"fig = px.bar(\n", "fig = px.bar(\n",
" area_df.to_pandas(), x=\"Area\", y=\"MdAPE (%)\", color=\"Method\",\n", " area_df.to_pandas(),\n",
" barmode=\"group\", title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n", " x=\"Area\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" barmode=\"group\",\n",
" title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n",
" category_orders={\"Area\": top_areas},\n", " category_orders={\"Area\": top_areas},\n",
")\n", ")\n",
"fig.update_layout(height=500)\n", "fig.update_layout(height=500)\n",
@ -324,11 +372,20 @@
" p = arr[mask]\n", " p = arr[mask]\n",
" valid = np.isfinite(p) & (a > 0)\n", " valid = np.isfinite(p) & (a > 0)\n",
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n", " ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
" rows.append({\"Gap (years)\": gap, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n", " rows.append(\n",
" {\n",
" \"Gap (years)\": gap,\n",
" \"Method\": name,\n",
" \"MdAPE (%)\": float(np.median(ape) * 100),\n",
" }\n",
" )\n",
"\n", "\n",
"gap_df = pl.DataFrame(rows)\n", "gap_df = pl.DataFrame(rows)\n",
"fig = px.line(\n", "fig = px.line(\n",
" gap_df.to_pandas(), x=\"Gap (years)\", y=\"MdAPE (%)\", color=\"Method\",\n", " gap_df.to_pandas(),\n",
" x=\"Gap (years)\",\n",
" y=\"MdAPE (%)\",\n",
" color=\"Method\",\n",
" title=\"MdAPE by Holding Period (years between input and actual sale)\",\n", " title=\"MdAPE by Holding Period (years between input and actual sale)\",\n",
" markers=True,\n", " markers=True,\n",
")\n", ")\n",

View file

@ -52,7 +52,9 @@
"pl.Config.set_tbl_rows(20)\n", "pl.Config.set_tbl_rows(20)\n",
"pl.Config.set_fmt_str_lengths(80)\n", "pl.Config.set_fmt_str_lengths(80)\n",
"\n", "\n",
"df = pl.read_parquet(\"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\")\n", "df = pl.read_parquet(\n",
" \"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\"\n",
")\n",
"schema = df.schema\n", "schema = df.schema\n",
"print(f\"Total rows: {len(df):,}\")\n", "print(f\"Total rows: {len(df):,}\")\n",
"print(f\"Columns ({len(schema)}):\")\n", "print(f\"Columns ({len(schema)}):\")\n",
@ -150,11 +152,13 @@
], ],
"source": [ "source": [
"# Null counts\n", "# Null counts\n",
"null_df = pl.DataFrame({\n", "null_df = pl.DataFrame(\n",
" \"column\": df.columns,\n", " {\n",
" \"nulls\": [df[c].null_count() for c in df.columns],\n", " \"column\": df.columns,\n",
" \"pct\": [f\"{df[c].null_count()/len(df)*100:.1f}%\" for c in df.columns],\n", " \"nulls\": [df[c].null_count() for c in df.columns],\n",
"})\n", " \"pct\": [f\"{df[c].null_count() / len(df) * 100:.1f}%\" for c in df.columns],\n",
" }\n",
")\n",
"null_df.filter(pl.col(\"nulls\") > 0)" "null_df.filter(pl.col(\"nulls\") > 0)"
] ]
}, },
@ -197,13 +201,17 @@
" \"price = 0\": len(df.filter(pl.col(\"price\") == 0)),\n", " \"price = 0\": len(df.filter(pl.col(\"price\") == 0)),\n",
" \"price > 50M\": len(df.filter(pl.col(\"price\") > 50_000_000)),\n", " \"price > 50M\": len(df.filter(pl.col(\"price\") > 50_000_000)),\n",
" \"floorspace > 10,000 sqm\": len(df.filter(pl.col(\"floorspace_sqm\") > 10_000)),\n", " \"floorspace > 10,000 sqm\": len(df.filter(pl.col(\"floorspace_sqm\") > 10_000)),\n",
" \"latitude outside UK (< 49 or > 61)\": len(df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))),\n", " \"latitude outside UK (< 49 or > 61)\": len(\n",
" \"longitude outside UK (< -8 or > 2)\": len(df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))),\n", " df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))\n",
" ),\n",
" \"longitude outside UK (< -8 or > 2)\": len(\n",
" df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))\n",
" ),\n",
" \"house_share = true\": len(df.filter(pl.col(\"house_share\"))),\n", " \"house_share = true\": len(df.filter(pl.col(\"house_share\"))),\n",
"}\n", "}\n",
"print(\"Data quality issues:\")\n", "print(\"Data quality issues:\")\n",
"for desc, count in issues.items():\n", "for desc, count in issues.items():\n",
" print(f\" {desc}: {count:,} ({count/len(df)*100:.2f}%)\")" " print(f\" {desc}: {count:,} ({count / len(df) * 100:.2f}%)\")"
] ]
}, },
{ {
@ -230,7 +238,7 @@
" & (pl.col(\"longitude\") >= -8)\n", " & (pl.col(\"longitude\") >= -8)\n",
" & (pl.col(\"longitude\") <= 2)\n", " & (pl.col(\"longitude\") <= 2)\n",
")\n", ")\n",
"print(f\"Clean rows: {len(clean):,} ({len(clean)/len(df)*100:.1f}% of original)\")" "print(f\"Clean rows: {len(clean):,} ({len(clean) / len(df) * 100:.1f}% of original)\")"
] ]
}, },
{ {
@ -1126,8 +1134,12 @@
"# Price histogram (clipped to 2nd-98th percentile)\n", "# Price histogram (clipped to 2nd-98th percentile)\n",
"lo, hi = price.quantile(0.02), price.quantile(0.98)\n", "lo, hi = price.quantile(0.02), price.quantile(0.98)\n",
"clipped = clean.filter((pl.col(\"price\") >= lo) & (pl.col(\"price\") <= hi))\n", "clipped = clean.filter((pl.col(\"price\") >= lo) & (pl.col(\"price\") <= hi))\n",
"fig = px.histogram(clipped.to_pandas(), x=\"price\", nbins=80,\n", "fig = px.histogram(\n",
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\")\n", " clipped.to_pandas(),\n",
" x=\"price\",\n",
" nbins=80,\n",
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Asking Price (£)\", yaxis_title=\"Count\")\n", "fig.update_layout(height=400, xaxis_title=\"Asking Price (£)\", yaxis_title=\"Count\")\n",
"fig.show()" "fig.show()"
] ]
@ -439978,9 +439990,13 @@
], ],
"source": [ "source": [
"# Price by property type\n", "# Price by property type\n",
"fig = px.box(clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n", "fig = px.box(\n",
" x=\"property_type\", y=\"price\", color=\"property_type\",\n", " clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n",
" title=\"Price by Property Type (capped at £2M for readability)\")\n", " x=\"property_type\",\n",
" y=\"price\",\n",
" color=\"property_type\",\n",
" title=\"Price by Property Type (capped at £2M for readability)\",\n",
")\n",
"fig.update_layout(height=500, showlegend=False, yaxis_title=\"Price (£)\")\n", "fig.update_layout(height=500, showlegend=False, yaxis_title=\"Price (£)\")\n",
"fig.show()" "fig.show()"
] ]
@ -440079,9 +440095,7 @@
"source": [ "source": [
"# Price qualifier breakdown\n", "# Price qualifier breakdown\n",
"pq = clean[\"price_qualifier\"].value_counts().sort(\"count\", descending=True)\n", "pq = clean[\"price_qualifier\"].value_counts().sort(\"count\", descending=True)\n",
"pq = pq.with_columns(\n", "pq = pq.with_columns((pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\"))\n",
" (pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\")\n",
")\n",
"pq" "pq"
] ]
}, },
@ -440928,8 +440942,12 @@
"source": [ "source": [
"# Property type distribution\n", "# Property type distribution\n",
"type_counts = clean[\"property_type\"].value_counts().sort(\"count\", descending=True)\n", "type_counts = clean[\"property_type\"].value_counts().sort(\"count\", descending=True)\n",
"fig = px.pie(type_counts.to_pandas(), names=\"property_type\", values=\"count\",\n", "fig = px.pie(\n",
" title=\"Property Type Distribution\")\n", " type_counts.to_pandas(),\n",
" names=\"property_type\",\n",
" values=\"count\",\n",
" title=\"Property Type Distribution\",\n",
")\n",
"fig.update_layout(height=400)\n", "fig.update_layout(height=400)\n",
"fig.show()" "fig.show()"
] ]
@ -441805,9 +441823,16 @@
], ],
"source": [ "source": [
"# Top 20 sub-types\n", "# Top 20 sub-types\n",
"sub_counts = clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n", "sub_counts = (\n",
"fig = px.bar(sub_counts.to_pandas(), x=\"count\", y=\"property_sub_type\", orientation=\"h\",\n", " clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n",
" title=\"Top 20 Property Sub-types\")\n", ")\n",
"fig = px.bar(\n",
" sub_counts.to_pandas(),\n",
" x=\"count\",\n",
" y=\"property_sub_type\",\n",
" orientation=\"h\",\n",
" title=\"Top 20 Property Sub-types\",\n",
")\n",
"fig.update_layout(height=600, yaxis={\"categoryorder\": \"total ascending\"})\n", "fig.update_layout(height=600, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()" "fig.show()"
] ]
@ -442643,9 +442668,15 @@
], ],
"source": [ "source": [
"# Tenure split\n", "# Tenure split\n",
"tenure_counts = clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n", "tenure_counts = (\n",
"fig = px.pie(tenure_counts.to_pandas(), names=\"tenure\", values=\"count\",\n", " clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n",
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count()/len(clean)*100:.1f}% missing)\")\n", ")\n",
"fig = px.pie(\n",
" tenure_counts.to_pandas(),\n",
" names=\"tenure\",\n",
" values=\"count\",\n",
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count() / len(clean) * 100:.1f}% missing)\",\n",
")\n",
"fig.update_layout(height=400)\n", "fig.update_layout(height=400)\n",
"fig.show()" "fig.show()"
] ]
@ -443546,8 +443577,14 @@
" .agg(pl.len().alias(\"count\"))\n", " .agg(pl.len().alias(\"count\"))\n",
" .sort(\"property_type\")\n", " .sort(\"property_type\")\n",
")\n", ")\n",
"fig = px.bar(tenure_by_type.to_pandas(), x=\"property_type\", y=\"count\", color=\"tenure\",\n", "fig = px.bar(\n",
" barmode=\"group\", title=\"Tenure by Property Type\")\n", " tenure_by_type.to_pandas(),\n",
" x=\"property_type\",\n",
" y=\"count\",\n",
" color=\"tenure\",\n",
" barmode=\"group\",\n",
" title=\"Tenure by Property Type\",\n",
")\n",
"fig.update_layout(height=400)\n", "fig.update_layout(height=400)\n",
"fig.show()" "fig.show()"
] ]
@ -444412,9 +444449,12 @@
], ],
"source": [ "source": [
"# Bedroom distribution\n", "# Bedroom distribution\n",
"bed_counts = clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n", "bed_counts = (\n",
"fig = px.bar(bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\",\n", " clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n",
" title=\"Bedroom Count Distribution\")\n", ")\n",
"fig = px.bar(\n",
" bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\", title=\"Bedroom Count Distribution\"\n",
")\n",
"fig.update_layout(height=400)\n", "fig.update_layout(height=400)\n",
"fig.show()" "fig.show()"
] ]
@ -445279,16 +445319,25 @@
")\n", ")\n",
"\n", "\n",
"fig = go.Figure()\n", "fig = go.Figure()\n",
"fig.add_trace(go.Bar(\n", "fig.add_trace(\n",
" x=price_by_beds[\"bedrooms\"], y=price_by_beds[\"median_price\"],\n", " go.Bar(\n",
" name=\"Median\", error_y=dict(type=\"data\",\n", " x=price_by_beds[\"bedrooms\"],\n",
" symmetric=False,\n", " y=price_by_beds[\"median_price\"],\n",
" array=(price_by_beds[\"p75\"] - price_by_beds[\"median_price\"]).to_list(),\n", " name=\"Median\",\n",
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list()\n", " error_y=dict(\n",
" type=\"data\",\n",
" symmetric=False,\n",
" array=(price_by_beds[\"p75\"] - price_by_beds[\"median_price\"]).to_list(),\n",
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list(),\n",
" ),\n",
" )\n", " )\n",
"))\n", ")\n",
"fig.update_layout(title=\"Median Price by Bedrooms (with IQR)\", height=400,\n", "fig.update_layout(\n",
" xaxis_title=\"Bedrooms\", yaxis_title=\"Price (£)\")\n", " title=\"Median Price by Bedrooms (with IQR)\",\n",
" height=400,\n",
" xaxis_title=\"Bedrooms\",\n",
" yaxis_title=\"Price (£)\",\n",
")\n",
"fig.show()" "fig.show()"
] ]
}, },
@ -446263,8 +446312,14 @@
" .agg(pl.len().alias(\"count\"))\n", " .agg(pl.len().alias(\"count\"))\n",
" .sort(\"property_type\", \"bedrooms\")\n", " .sort(\"property_type\", \"bedrooms\")\n",
")\n", ")\n",
"fig = px.bar(beds_by_type.to_pandas(), x=\"bedrooms\", y=\"count\", color=\"property_type\",\n", "fig = px.bar(\n",
" barmode=\"group\", title=\"Bedroom Distribution by Property Type\")\n", " beds_by_type.to_pandas(),\n",
" x=\"bedrooms\",\n",
" y=\"count\",\n",
" color=\"property_type\",\n",
" barmode=\"group\",\n",
" title=\"Bedroom Distribution by Property Type\",\n",
")\n",
"fig.update_layout(height=450)\n", "fig.update_layout(height=450)\n",
"fig.show()" "fig.show()"
] ]
@ -446323,19 +446378,26 @@
], ],
"source": [ "source": [
"# Floorspace availability by property type\n", "# Floorspace availability by property type\n",
"has_floor = clean.with_columns(pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\"))\n", "has_floor = clean.with_columns(\n",
"floor_by_type = (\n", " pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\")\n",
" has_floor.group_by(\"property_type\", \"has_floorspace\")\n", ")\n",
" .agg(pl.len().alias(\"count\"))\n", "floor_by_type = has_floor.group_by(\"property_type\", \"has_floorspace\").agg(\n",
" pl.len().alias(\"count\")\n",
")\n",
"totals = floor_by_type.group_by(\"property_type\").agg(\n",
" pl.col(\"count\").sum().alias(\"total\")\n",
")\n", ")\n",
"totals = floor_by_type.group_by(\"property_type\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"floor_pct = (\n", "floor_pct = (\n",
" floor_by_type.filter(pl.col(\"has_floorspace\"))\n", " floor_by_type.filter(pl.col(\"has_floorspace\"))\n",
" .join(totals, on=\"property_type\")\n", " .join(totals, on=\"property_type\")\n",
" .with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\"))\n", " .with_columns(\n",
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\")\n",
" )\n",
" .sort(\"pct_with_floorspace\", descending=True)\n", " .sort(\"pct_with_floorspace\", descending=True)\n",
")\n", ")\n",
"print(f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%)\")\n", "print(\n",
" f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%)\"\n",
")\n",
"floor_pct.select(\"property_type\", \"count\", \"total\", \"pct_with_floorspace\")" "floor_pct.select(\"property_type\", \"count\", \"total\", \"pct_with_floorspace\")"
] ]
}, },
@ -447298,8 +447360,13 @@
")\n", ")\n",
"print(f\"Properties with reasonable floorspace (10-1000 sqm): {len(with_floor):,}\")\n", "print(f\"Properties with reasonable floorspace (10-1000 sqm): {len(with_floor):,}\")\n",
"\n", "\n",
"fig = px.histogram(with_floor.to_pandas(), x=\"floorspace_sqm\", nbins=80, color=\"property_type\",\n", "fig = px.histogram(\n",
" title=\"Floorspace Distribution by Property Type\")\n", " with_floor.to_pandas(),\n",
" x=\"floorspace_sqm\",\n",
" nbins=80,\n",
" color=\"property_type\",\n",
" title=\"Floorspace Distribution by Property Type\",\n",
")\n",
"fig.update_layout(height=450, xaxis_title=\"Floorspace (sqm)\", barmode=\"overlay\")\n", "fig.update_layout(height=450, xaxis_title=\"Floorspace (sqm)\", barmode=\"overlay\")\n",
"fig.update_traces(opacity=0.6)\n", "fig.update_traces(opacity=0.6)\n",
"fig.show()" "fig.show()"
@ -448176,8 +448243,12 @@
"print(f\" P25: £{s.quantile(0.25):,.0f}/sqm\")\n", "print(f\" P25: £{s.quantile(0.25):,.0f}/sqm\")\n",
"print(f\" P75: £{s.quantile(0.75):,.0f}/sqm\")\n", "print(f\" P75: £{s.quantile(0.75):,.0f}/sqm\")\n",
"\n", "\n",
"fig = px.histogram(ppsqm.to_pandas(), x=\"price_per_sqm\", nbins=80,\n", "fig = px.histogram(\n",
" title=\"Price per Square Metre Distribution\")\n", " ppsqm.to_pandas(),\n",
" x=\"price_per_sqm\",\n",
" nbins=80,\n",
" title=\"Price per Square Metre Distribution\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Price per sqm (£)\")\n", "fig.update_layout(height=400, xaxis_title=\"Price per sqm (£)\")\n",
"fig.show()" "fig.show()"
] ]
@ -584906,8 +584977,13 @@
} }
], ],
"source": [ "source": [
"fig = px.box(ppsqm.to_pandas(), x=\"property_type\", y=\"price_per_sqm\", color=\"property_type\",\n", "fig = px.box(\n",
" title=\"Price per sqm by Property Type\")\n", " ppsqm.to_pandas(),\n",
" x=\"property_type\",\n",
" y=\"price_per_sqm\",\n",
" color=\"property_type\",\n",
" title=\"Price per sqm by Property Type\",\n",
")\n",
"fig.update_layout(height=450, showlegend=False, yaxis_title=\"£ per sqm\")\n", "fig.update_layout(height=450, showlegend=False, yaxis_title=\"£ per sqm\")\n",
"fig.show()" "fig.show()"
] ]
@ -585865,9 +585941,15 @@
")\n", ")\n",
"\n", "\n",
"top30 = outcode_stats.head(30)\n", "top30 = outcode_stats.head(30)\n",
"fig = px.bar(top30.to_pandas(), x=\"count\", y=\"outcode\", orientation=\"h\",\n", "fig = px.bar(\n",
" color=\"median_price\", color_continuous_scale=\"Viridis\",\n", " top30.to_pandas(),\n",
" title=\"Top 30 Outcodes by Listing Volume\")\n", " x=\"count\",\n",
" y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"median_price\",\n",
" color_continuous_scale=\"Viridis\",\n",
" title=\"Top 30 Outcodes by Listing Volume\",\n",
")\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"})\n", "fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()" "fig.show()"
] ]
@ -587400,11 +587482,25 @@
], ],
"source": [ "source": [
"# Most expensive outcodes (min 50 listings)\n", "# Most expensive outcodes (min 50 listings)\n",
"expensive = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\", descending=True).head(30)\n", "expensive = (\n",
"fig = px.bar(expensive.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n", " outcode_stats.filter(pl.col(\"count\") >= 50)\n",
" color=\"count\", color_continuous_scale=\"Blues\",\n", " .sort(\"median_price\", descending=True)\n",
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\")\n", " .head(30)\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"}, xaxis_title=\"Median Price (£)\")\n", ")\n",
"fig = px.bar(\n",
" expensive.to_pandas(),\n",
" x=\"median_price\",\n",
" y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"count\",\n",
" color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\",\n",
")\n",
"fig.update_layout(\n",
" height=700,\n",
" yaxis={\"categoryorder\": \"total ascending\"},\n",
" xaxis_title=\"Median Price (£)\",\n",
")\n",
"fig.show()" "fig.show()"
] ]
}, },
@ -588914,10 +589010,20 @@
"source": [ "source": [
"# Cheapest outcodes (min 50 listings)\n", "# Cheapest outcodes (min 50 listings)\n",
"cheapest = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\").head(30)\n", "cheapest = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\").head(30)\n",
"fig = px.bar(cheapest.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n", "fig = px.bar(\n",
" color=\"count\", color_continuous_scale=\"Blues\",\n", " cheapest.to_pandas(),\n",
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\")\n", " x=\"median_price\",\n",
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total descending\"}, xaxis_title=\"Median Price (£)\")\n", " y=\"outcode\",\n",
" orientation=\"h\",\n",
" color=\"count\",\n",
" color_continuous_scale=\"Blues\",\n",
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\",\n",
")\n",
"fig.update_layout(\n",
" height=700,\n",
" yaxis={\"categoryorder\": \"total descending\"},\n",
" xaxis_title=\"Median Price (£)\",\n",
")\n",
"fig.show()" "fig.show()"
] ]
}, },
@ -589828,14 +589934,19 @@
"source": [ "source": [
"# Geographic scatter of listings (sample for performance)\n", "# Geographic scatter of listings (sample for performance)\n",
"sample = clean.sample(n=min(20_000, len(clean)), seed=42)\n", "sample = clean.sample(n=min(20_000, len(clean)), seed=42)\n",
"fig = px.scatter_map(sample.to_pandas(),\n", "fig = px.scatter_map(\n",
" lat=\"latitude\", lon=\"longitude\",\n", " sample.to_pandas(),\n",
" color=\"price\", size_max=4,\n", " lat=\"latitude\",\n",
" color_continuous_scale=\"Viridis\",\n", " lon=\"longitude\",\n",
" range_color=[100_000, 1_500_000],\n", " color=\"price\",\n",
" zoom=5, center={\"lat\": 52.5, \"lon\": -1.5},\n", " size_max=4,\n",
" title=\"Listing Locations (20k sample, colored by price)\",\n", " color_continuous_scale=\"Viridis\",\n",
" opacity=0.4)\n", " range_color=[100_000, 1_500_000],\n",
" zoom=5,\n",
" center={\"lat\": 52.5, \"lon\": -1.5},\n",
" title=\"Listing Locations (20k sample, colored by price)\",\n",
" opacity=0.4,\n",
")\n",
"fig.update_layout(height=700)\n", "fig.update_layout(height=700)\n",
"fig.show()" "fig.show()"
] ]
@ -589864,7 +589975,9 @@
"source": [ "source": [
"# Parse dates and look at listing age\n", "# Parse dates and look at listing age\n",
"with_dates = clean.with_columns(\n", "with_dates = clean.with_columns(\n",
" pl.col(\"first_visible_date\").str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\").alias(\"listed_at\"),\n", " pl.col(\"first_visible_date\")\n",
" .str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
" .alias(\"listed_at\"),\n",
")\n", ")\n",
"\n", "\n",
"print(f\"Date range: {with_dates['listed_at'].min()} to {with_dates['listed_at'].max()}\")" "print(f\"Date range: {with_dates['listed_at'].min()} to {with_dates['listed_at'].max()}\")"
@ -590856,8 +590969,9 @@
" .sort(\"month\")\n", " .sort(\"month\")\n",
")\n", ")\n",
"\n", "\n",
"fig = px.bar(monthly.to_pandas(), x=\"month\", y=\"count\",\n", "fig = px.bar(\n",
" title=\"Listings by Month Listed\")\n", " monthly.to_pandas(), x=\"month\", y=\"count\", title=\"Listings by Month Listed\"\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Month\", yaxis_title=\"Listings\")\n", "fig.update_layout(height=400, xaxis_title=\"Month\", yaxis_title=\"Listings\")\n",
"fig.show()" "fig.show()"
] ]
@ -590884,6 +590998,7 @@
"source": [ "source": [
"# How old are current listings? (days since first visible)\n", "# How old are current listings? (days since first visible)\n",
"import datetime\n", "import datetime\n",
"\n",
"now = datetime.datetime(2026, 2, 14)\n", "now = datetime.datetime(2026, 2, 14)\n",
"with_age = with_dates.with_columns(\n", "with_age = with_dates.with_columns(\n",
" ((pl.lit(now) - pl.col(\"listed_at\")).dt.total_days()).alias(\"days_on_market\")\n", " ((pl.lit(now) - pl.col(\"listed_at\")).dt.total_days()).alias(\"days_on_market\")\n",
@ -590896,7 +591011,7 @@
"print(f\" P25: {age.quantile(0.25):.0f} days\")\n", "print(f\" P25: {age.quantile(0.25):.0f} days\")\n",
"print(f\" P75: {age.quantile(0.75):.0f} days\")\n", "print(f\" P75: {age.quantile(0.75):.0f} days\")\n",
"print(f\" P95: {age.quantile(0.95):.0f} days\")\n", "print(f\" P95: {age.quantile(0.95):.0f} days\")\n",
"print(f\" Max: {age.max():.0f} days ({age.max()/365:.1f} years)\")" "print(f\" Max: {age.max():.0f} days ({age.max() / 365:.1f} years)\")"
] ]
}, },
{ {
@ -591749,8 +591864,12 @@
"source": [ "source": [
"# Days on market distribution (cap at 2 years for readability)\n", "# Days on market distribution (cap at 2 years for readability)\n",
"capped = with_age.filter(pl.col(\"days_on_market\") <= 730)\n", "capped = with_age.filter(pl.col(\"days_on_market\") <= 730)\n",
"fig = px.histogram(capped.to_pandas(), x=\"days_on_market\", nbins=100,\n", "fig = px.histogram(\n",
" title=\"Days on Market Distribution (capped at 2 years)\")\n", " capped.to_pandas(),\n",
" x=\"days_on_market\",\n",
" nbins=100,\n",
" title=\"Days on Market Distribution (capped at 2 years)\",\n",
")\n",
"fig.update_layout(height=400, xaxis_title=\"Days on Market\", yaxis_title=\"Count\")\n", "fig.update_layout(height=400, xaxis_title=\"Days on Market\", yaxis_title=\"Count\")\n",
"fig.show()" "fig.show()"
] ]
@ -591883,11 +592002,13 @@
"# Explode features list and count most common\n", "# Explode features list and count most common\n",
"features_exploded = clean.select(\"features\").explode(\"features\").drop_nulls()\n", "features_exploded = clean.select(\"features\").explode(\"features\").drop_nulls()\n",
"print(f\"Total feature entries: {len(features_exploded):,}\")\n", "print(f\"Total feature entries: {len(features_exploded):,}\")\n",
"print(f\"Features per listing: {len(features_exploded)/len(clean):.1f} avg\")\n", "print(f\"Features per listing: {len(features_exploded) / len(clean):.1f} avg\")\n",
"\n", "\n",
"# Most common features (lowercased for grouping)\n", "# Most common features (lowercased for grouping)\n",
"feature_counts = (\n", "feature_counts = (\n",
" features_exploded.with_columns(pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\"))\n", " features_exploded.with_columns(\n",
" pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\")\n",
" )\n",
" .group_by(\"feature_lower\")\n", " .group_by(\"feature_lower\")\n",
" .agg(pl.len().alias(\"count\"))\n", " .agg(pl.len().alias(\"count\"))\n",
" .sort(\"count\", descending=True)\n", " .sort(\"count\", descending=True)\n",
@ -592794,16 +592915,64 @@
"all_features = features_exploded[\"features\"].to_list()\n", "all_features = features_exploded[\"features\"].to_list()\n",
"word_counter = Counter()\n", "word_counter = Counter()\n",
"for feat in all_features:\n", "for feat in all_features:\n",
" words = re.findall(r'[a-z]+', feat.lower())\n", " words = re.findall(r\"[a-z]+\", feat.lower())\n",
" word_counter.update(words)\n", " word_counter.update(words)\n",
"\n", "\n",
"# Filter out very short/common words\n", "# Filter out very short/common words\n",
"stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'to', 'in', 'with', 'for', 'on', 'at', 'by', 'is', 'it', 'from', 'as', 'be', 'this', 'that', 'are', 'was', 'has', 'have', 'not', 'but', 'all', 'can', 'had', 'her', 'his', 'one', 'our', 'out', 'you', 'will'}\n", "stop_words = {\n",
"keywords = [(w, c) for w, c in word_counter.most_common(100) if w not in stop_words and len(w) > 2]\n", " \"the\",\n",
"kw_df = pl.DataFrame({\"word\": [w for w,c in keywords[:40]], \"count\": [c for w,c in keywords[:40]]})\n", " \"a\",\n",
" \"an\",\n",
" \"and\",\n",
" \"or\",\n",
" \"of\",\n",
" \"to\",\n",
" \"in\",\n",
" \"with\",\n",
" \"for\",\n",
" \"on\",\n",
" \"at\",\n",
" \"by\",\n",
" \"is\",\n",
" \"it\",\n",
" \"from\",\n",
" \"as\",\n",
" \"be\",\n",
" \"this\",\n",
" \"that\",\n",
" \"are\",\n",
" \"was\",\n",
" \"has\",\n",
" \"have\",\n",
" \"not\",\n",
" \"but\",\n",
" \"all\",\n",
" \"can\",\n",
" \"had\",\n",
" \"her\",\n",
" \"his\",\n",
" \"one\",\n",
" \"our\",\n",
" \"out\",\n",
" \"you\",\n",
" \"will\",\n",
"}\n",
"keywords = [\n",
" (w, c)\n",
" for w, c in word_counter.most_common(100)\n",
" if w not in stop_words and len(w) > 2\n",
"]\n",
"kw_df = pl.DataFrame(\n",
" {\"word\": [w for w, c in keywords[:40]], \"count\": [c for w, c in keywords[:40]]}\n",
")\n",
"\n", "\n",
"fig = px.bar(kw_df.to_pandas(), x=\"count\", y=\"word\", orientation=\"h\",\n", "fig = px.bar(\n",
" title=\"Most Common Words in Feature Descriptions\")\n", " kw_df.to_pandas(),\n",
" x=\"count\",\n",
" y=\"word\",\n",
" orientation=\"h\",\n",
" title=\"Most Common Words in Feature Descriptions\",\n",
")\n",
"fig.update_layout(height=800, yaxis={\"categoryorder\": \"total ascending\"})\n", "fig.update_layout(height=800, yaxis={\"categoryorder\": \"total ascending\"})\n",
"fig.show()" "fig.show()"
] ]
@ -593767,9 +593936,14 @@
" & (pl.col(\"price\") < 3_000_000)\n", " & (pl.col(\"price\") < 3_000_000)\n",
").sample(n=min(15_000, len(with_floor)), seed=42)\n", ").sample(n=min(15_000, len(with_floor)), seed=42)\n",
"\n", "\n",
"fig = px.scatter(scatter_df.to_pandas(), x=\"floorspace_sqm\", y=\"price\",\n", "fig = px.scatter(\n",
" color=\"property_type\", opacity=0.3,\n", " scatter_df.to_pandas(),\n",
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\")\n", " x=\"floorspace_sqm\",\n",
" y=\"price\",\n",
" color=\"property_type\",\n",
" opacity=0.3,\n",
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\",\n",
")\n",
"fig.update_layout(height=600, xaxis_title=\"Floorspace (sqm)\", yaxis_title=\"Price (£)\")\n", "fig.update_layout(height=600, xaxis_title=\"Floorspace (sqm)\", yaxis_title=\"Price (£)\")\n",
"fig.show()" "fig.show()"
] ]
@ -594739,8 +594913,14 @@
" .agg(pl.col(\"price\").median().alias(\"median_price\"), pl.len().alias(\"count\"))\n", " .agg(pl.col(\"price\").median().alias(\"median_price\"), pl.len().alias(\"count\"))\n",
" .sort(\"property_type\", \"bedrooms\")\n", " .sort(\"property_type\", \"bedrooms\")\n",
")\n", ")\n",
"fig = px.line(bp.to_pandas(), x=\"bedrooms\", y=\"median_price\", color=\"property_type\",\n", "fig = px.line(\n",
" markers=True, title=\"Median Price by Bedrooms and Property Type\")\n", " bp.to_pandas(),\n",
" x=\"bedrooms\",\n",
" y=\"median_price\",\n",
" color=\"property_type\",\n",
" markers=True,\n",
" title=\"Median Price by Bedrooms and Property Type\",\n",
")\n",
"fig.update_layout(height=450, xaxis_title=\"Bedrooms\", yaxis_title=\"Median Price (£)\")\n", "fig.update_layout(height=450, xaxis_title=\"Bedrooms\", yaxis_title=\"Median Price (£)\")\n",
"fig.show()" "fig.show()"
] ]
@ -594789,18 +594969,28 @@
"print(f\"Total listings: {len(clean):,}\")\n", "print(f\"Total listings: {len(clean):,}\")\n",
"print(f\"Outcodes covered: {clean['outcode'].n_unique():,}\")\n", "print(f\"Outcodes covered: {clean['outcode'].n_unique():,}\")\n",
"print(\"\")\n", "print(\"\")\n",
"print(f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\")\n", "print(\n",
"print(f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\")\n", " f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\"\n",
")\n",
"print(\n",
" f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\"\n",
")\n",
"print(\"\")\n", "print(\"\")\n",
"print(f\"Tenure known: {(len(clean) - clean['tenure'].null_count())/len(clean)*100:.1f}%\")\n", "print(\n",
" f\"Tenure known: {(len(clean) - clean['tenure'].null_count()) / len(clean) * 100:.1f}%\"\n",
")\n",
"print(f\" Freehold: {len(clean.filter(pl.col('tenure') == 'Freehold')):,}\")\n", "print(f\" Freehold: {len(clean.filter(pl.col('tenure') == 'Freehold')):,}\")\n",
"print(f\" Leasehold: {len(clean.filter(pl.col('tenure') == 'Leasehold')):,}\")\n", "print(f\" Leasehold: {len(clean.filter(pl.col('tenure') == 'Leasehold')):,}\")\n",
"print(\"\")\n", "print(\"\")\n",
"print(f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%\")\n", "print(\n",
" f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%\"\n",
")\n",
"print(\"\")\n", "print(\"\")\n",
"print(\"Property types:\")\n", "print(\"Property types:\")\n",
"for row in clean['property_type'].value_counts().sort('count', descending=True).iter_rows():\n", "for row in (\n",
" print(f\" {row[0]}: {row[1]:,} ({row[1]/len(clean)*100:.1f}%)\")" " clean[\"property_type\"].value_counts().sort(\"count\", descending=True).iter_rows()\n",
"):\n",
" print(f\" {row[0]}: {row[1]:,} ({row[1] / len(clean) * 100:.1f}%)\")"
] ]
} }
], ],

View file

@ -52,6 +52,7 @@
"buy = pl.read_parquet(f\"{DATA}/online_listings_buy.parquet\")\n", "buy = pl.read_parquet(f\"{DATA}/online_listings_buy.parquet\")\n",
"rent = pl.read_parquet(f\"{DATA}/online_listings_rent.parquet\")\n", "rent = pl.read_parquet(f\"{DATA}/online_listings_rent.parquet\")\n",
"\n", "\n",
"\n",
"def tag_source(df: pl.DataFrame) -> pl.DataFrame:\n", "def tag_source(df: pl.DataFrame) -> pl.DataFrame:\n",
" return df.with_columns(\n", " return df.with_columns(\n",
" pl.when(pl.col(\"Listing URL\").str.contains(\"rightmove\"))\n", " pl.when(pl.col(\"Listing URL\").str.contains(\"rightmove\"))\n",
@ -62,6 +63,7 @@
" .alias(\"source\")\n", " .alias(\"source\")\n",
" )\n", " )\n",
"\n", "\n",
"\n",
"buy = tag_source(buy)\n", "buy = tag_source(buy)\n",
"rent = tag_source(rent)\n", "rent = tag_source(rent)\n",
"\n", "\n",
@ -122,7 +124,7 @@
" print(f\"\\n=== {label} ===\")\n", " print(f\"\\n=== {label} ===\")\n",
" for row in counts.iter_rows():\n", " for row in counts.iter_rows():\n",
" src, cnt = row\n", " src, cnt = row\n",
" print(f\" {src}: {cnt:,} ({cnt/len(df)*100:.1f}%)\")\n", " print(f\" {src}: {cnt:,} ({cnt / len(df) * 100:.1f}%)\")\n",
"\n", "\n",
"# Known dedup count from scraper logs\n", "# Known dedup count from scraper logs\n",
"CROSS_DEDUP_BUY = 2_220\n", "CROSS_DEDUP_BUY = 2_220\n",
@ -132,7 +134,7 @@
"print(f\"Home.co.uk scraped (before dedup): {hk_buy_total:,}\")\n", "print(f\"Home.co.uk scraped (before dedup): {hk_buy_total:,}\")\n",
"print(f\"Home.co.uk unique (after dedup): {hk_buy_unique:,}\")\n", "print(f\"Home.co.uk unique (after dedup): {hk_buy_unique:,}\")\n",
"print(f\"Cross-source duplicates removed: {CROSS_DEDUP_BUY:,}\")\n", "print(f\"Cross-source duplicates removed: {CROSS_DEDUP_BUY:,}\")\n",
"print(f\"Overlap rate: {CROSS_DEDUP_BUY/hk_buy_total*100:.1f}%\")" "print(f\"Overlap rate: {CROSS_DEDUP_BUY / hk_buy_total * 100:.1f}%\")"
] ]
}, },
{ {
@ -987,23 +989,29 @@
"# Venn-style summary\n", "# Venn-style summary\n",
"rm_buy = len(buy.filter(pl.col(\"source\") == \"Rightmove\"))\n", "rm_buy = len(buy.filter(pl.col(\"source\") == \"Rightmove\"))\n",
"\n", "\n",
"fig = go.Figure(go.Sankey(\n", "fig = go.Figure(\n",
" node=dict(\n", " go.Sankey(\n",
" label=[\n", " node=dict(\n",
" f\"Rightmove\\n{rm_buy:,}\",\n", " label=[\n",
" f\"Home.co.uk\\n{hk_buy_total:,} scraped\",\n", " f\"Rightmove\\n{rm_buy:,}\",\n",
" f\"Merged BUY\\n{len(buy):,}\",\n", " f\"Home.co.uk\\n{hk_buy_total:,} scraped\",\n",
" f\"Deduped\\n{CROSS_DEDUP_BUY:,}\",\n", " f\"Merged BUY\\n{len(buy):,}\",\n",
" ],\n", " f\"Deduped\\n{CROSS_DEDUP_BUY:,}\",\n",
" color=[\"#2563eb\", \"#10b981\", \"#6366f1\", \"#ef4444\"],\n", " ],\n",
" ),\n", " color=[\"#2563eb\", \"#10b981\", \"#6366f1\", \"#ef4444\"],\n",
" link=dict(\n", " ),\n",
" source=[0, 1, 1],\n", " link=dict(\n",
" target=[2, 2, 3],\n", " source=[0, 1, 1],\n",
" value=[rm_buy, hk_buy_unique, CROSS_DEDUP_BUY],\n", " target=[2, 2, 3],\n",
" color=[\"rgba(37,99,235,0.3)\", \"rgba(16,185,129,0.3)\", \"rgba(239,68,68,0.3)\"],\n", " value=[rm_buy, hk_buy_unique, CROSS_DEDUP_BUY],\n",
" ),\n", " color=[\n",
"))\n", " \"rgba(37,99,235,0.3)\",\n",
" \"rgba(16,185,129,0.3)\",\n",
" \"rgba(239,68,68,0.3)\",\n",
" ],\n",
" ),\n",
" )\n",
")\n",
"fig.update_layout(title=\"BUY Channel: Source Contribution Flow\", height=350)\n", "fig.update_layout(title=\"BUY Channel: Source Contribution Flow\", height=350)\n",
"fig.show()" "fig.show()"
] ]
@ -1106,8 +1114,11 @@
"oc_comparison = (\n", "oc_comparison = (\n",
" hk_by_oc.join(rm_by_oc, on=\"outcode\", how=\"left\")\n", " hk_by_oc.join(rm_by_oc, on=\"outcode\", how=\"left\")\n",
" .with_columns(\n", " .with_columns(\n",
" (pl.col(\"hk_count\") / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0)) * 100)\n", " (\n",
" .alias(\"hk_pct_of_total\")\n", " pl.col(\"hk_count\")\n",
" / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0))\n",
" * 100\n",
" ).alias(\"hk_pct_of_total\")\n",
" )\n", " )\n",
" .sort(\"hk_count\", descending=True)\n", " .sort(\"hk_count\", descending=True)\n",
")\n", ")\n",
@ -2215,18 +2226,28 @@
"source": [ "source": [
"# Bar chart: home.co.uk vs Rightmove counts per outcode\n", "# Bar chart: home.co.uk vs Rightmove counts per outcode\n",
"fig = go.Figure()\n", "fig = go.Figure()\n",
"fig.add_trace(go.Bar(\n", "fig.add_trace(\n",
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"rm_count\"],\n", " go.Bar(\n",
" name=\"Rightmove\", marker_color=\"#2563eb\",\n", " x=oc_comparison[\"outcode\"],\n",
"))\n", " y=oc_comparison[\"rm_count\"],\n",
"fig.add_trace(go.Bar(\n", " name=\"Rightmove\",\n",
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"hk_count\"],\n", " marker_color=\"#2563eb\",\n",
" name=\"Home.co.uk\", marker_color=\"#10b981\",\n", " )\n",
"))\n", ")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=oc_comparison[\"outcode\"],\n",
" y=oc_comparison[\"hk_count\"],\n",
" name=\"Home.co.uk\",\n",
" marker_color=\"#10b981\",\n",
" )\n",
")\n",
"fig.update_layout(\n", "fig.update_layout(\n",
" barmode=\"group\", height=400,\n", " barmode=\"group\",\n",
" height=400,\n",
" title=\"Listings per Outcode: Rightmove vs Home.co.uk (outcodes with HK coverage)\",\n", " title=\"Listings per Outcode: Rightmove vs Home.co.uk (outcodes with HK coverage)\",\n",
" xaxis_title=\"Outcode\", yaxis_title=\"Listings\",\n", " xaxis_title=\"Outcode\",\n",
" yaxis_title=\"Listings\",\n",
")\n", ")\n",
"fig.show()" "fig.show()"
] ]
@ -3121,10 +3142,14 @@
"sample = covered.sample(n=min(30_000, len(covered)), seed=42)\n", "sample = covered.sample(n=min(30_000, len(covered)), seed=42)\n",
"\n", "\n",
"fig = px.scatter_map(\n", "fig = px.scatter_map(\n",
" sample.to_pandas(), lat=\"lat\", lon=\"lon\",\n", " sample.to_pandas(),\n",
" lat=\"lat\",\n",
" lon=\"lon\",\n",
" color=\"source\",\n", " color=\"source\",\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n", " color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" zoom=7, opacity=0.4, size_max=4,\n", " zoom=7,\n",
" opacity=0.4,\n",
" size_max=4,\n",
" title=\"Listing Locations in Covered Outcodes (by source)\",\n", " title=\"Listing Locations in Covered Outcodes (by source)\",\n",
")\n", ")\n",
"fig.update_layout(height=600)\n", "fig.update_layout(height=600)\n",
@ -3188,15 +3213,41 @@
"# For covered outcodes, compare home.co.uk listings against Rightmove\n", "# For covered outcodes, compare home.co.uk listings against Rightmove\n",
"# to find near-matches (same postcode, same beds, price within 5%)\n", "# to find near-matches (same postcode, same beds, price within 5%)\n",
"\n", "\n",
"hk = buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\").select(\n", "hk = (\n",
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n", " buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\")\n",
").rename({\"Asking price\": \"hk_price\", \"Property type\": \"hk_type\", \"Address per Property Register\": \"hk_addr\"})\n", " .select(\n",
" \"Postcode\",\n",
" \"Bedrooms\",\n",
" \"Asking price\",\n",
" \"Property type\",\n",
" \"Address per Property Register\",\n",
" )\n",
" .rename(\n",
" {\n",
" \"Asking price\": \"hk_price\",\n",
" \"Property type\": \"hk_type\",\n",
" \"Address per Property Register\": \"hk_addr\",\n",
" }\n",
" )\n",
")\n",
"\n", "\n",
"rm = buy_oc.filter(\n", "rm = (\n",
" pl.col(\"source\") == \"Rightmove\"\n", " buy_oc.filter(pl.col(\"source\") == \"Rightmove\")\n",
").select(\n", " .select(\n",
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n", " \"Postcode\",\n",
").rename({\"Asking price\": \"rm_price\", \"Property type\": \"rm_type\", \"Address per Property Register\": \"rm_addr\"})\n", " \"Bedrooms\",\n",
" \"Asking price\",\n",
" \"Property type\",\n",
" \"Address per Property Register\",\n",
" )\n",
" .rename(\n",
" {\n",
" \"Asking price\": \"rm_price\",\n",
" \"Property type\": \"rm_type\",\n",
" \"Address per Property Register\": \"rm_addr\",\n",
" }\n",
" )\n",
")\n",
"\n", "\n",
"# Join on postcode + bedrooms\n", "# Join on postcode + bedrooms\n",
"joined = hk.join(rm, on=[\"Postcode\", \"Bedrooms\"], how=\"inner\")\n", "joined = hk.join(rm, on=[\"Postcode\", \"Bedrooms\"], how=\"inner\")\n",
@ -3213,16 +3264,24 @@
"exact = joined.filter(pl.col(\"hk_price\") == pl.col(\"rm_price\"))\n", "exact = joined.filter(pl.col(\"hk_price\") == pl.col(\"rm_price\"))\n",
"\n", "\n",
"print(f\"Home.co.uk listings (unique, in file): {len(hk):,}\")\n", "print(f\"Home.co.uk listings (unique, in file): {len(hk):,}\")\n",
"print(f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\")\n", "print(\n",
" f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\"\n",
")\n",
"print()\n", "print()\n",
"print(f\"Joined on (postcode, bedrooms): {len(joined):,} candidate pairs\")\n", "print(f\"Joined on (postcode, bedrooms): {len(joined):,} candidate pairs\")\n",
"print(f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\")\n", "print(\n",
"print(f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\")\n", " f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\"\n",
")\n",
"print(\n",
" f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\"\n",
")\n",
"print()\n", "print()\n",
"# Unique hk listings that have at least one near-match\n", "# Unique hk listings that have at least one near-match\n",
"hk_with_near = near.select(\"hk_price\", \"hk_addr\", \"Postcode\").unique()\n", "hk_with_near = near.select(\"hk_price\", \"hk_addr\", \"Postcode\").unique()\n",
"print(f\"Home.co.uk listings with a near-match in RM: ~{len(hk_with_near):,}\")\n", "print(f\"Home.co.uk listings with a near-match in RM: ~{len(hk_with_near):,}\")\n",
"print(f\"Estimated additional overlap: ~{len(hk_with_near)/len(hk)*100:.1f}% of unique HK listings\")" "print(\n",
" f\"Estimated additional overlap: ~{len(hk_with_near) / len(hk) * 100:.1f}% of unique HK listings\"\n",
")"
] ]
}, },
{ {
@ -4178,9 +4237,13 @@
")\n", ")\n",
"\n", "\n",
"fig = px.histogram(\n", "fig = px.histogram(\n",
" clipped.to_pandas(), x=\"Asking price\", color=\"source\", nbins=80,\n", " clipped.to_pandas(),\n",
" x=\"Asking price\",\n",
" color=\"source\",\n",
" nbins=80,\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n", " color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" barmode=\"overlay\", histnorm=\"probability density\",\n", " barmode=\"overlay\",\n",
" histnorm=\"probability density\",\n",
" title=\"Price Distribution by Source (normalised, £50k£2M)\",\n", " title=\"Price Distribution by Source (normalised, £50k£2M)\",\n",
")\n", ")\n",
"fig.update_traces(opacity=0.6)\n", "fig.update_traces(opacity=0.6)\n",
@ -5095,10 +5158,7 @@
], ],
"source": [ "source": [
"# Property type distribution by source\n", "# Property type distribution by source\n",
"type_by_src = (\n", "type_by_src = buy.group_by(\"source\", \"Property type\").agg(pl.len().alias(\"count\"))\n",
" buy.group_by(\"source\", \"Property type\")\n",
" .agg(pl.len().alias(\"count\"))\n",
")\n",
"# Normalise within each source\n", "# Normalise within each source\n",
"totals = type_by_src.group_by(\"source\").agg(pl.col(\"count\").sum().alias(\"total\"))\n", "totals = type_by_src.group_by(\"source\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"type_by_src = type_by_src.join(totals, on=\"source\").with_columns(\n", "type_by_src = type_by_src.join(totals, on=\"source\").with_columns(\n",
@ -5107,7 +5167,10 @@
"\n", "\n",
"fig = px.bar(\n", "fig = px.bar(\n",
" type_by_src.sort(\"Property type\").to_pandas(),\n", " type_by_src.sort(\"Property type\").to_pandas(),\n",
" x=\"Property type\", y=\"pct\", color=\"source\", barmode=\"group\",\n", " x=\"Property type\",\n",
" y=\"pct\",\n",
" color=\"source\",\n",
" barmode=\"group\",\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n", " color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" title=\"Property Type Distribution by Source (%)\",\n", " title=\"Property Type Distribution by Source (%)\",\n",
")\n", ")\n",
@ -5186,7 +5249,9 @@
"# Property sub-type comparison — top home.co.uk sub-types\n", "# Property sub-type comparison — top home.co.uk sub-types\n",
"hk_subtypes = (\n", "hk_subtypes = (\n",
" buy.filter(pl.col(\"source\") == \"Home.co.uk\")[\"Property sub-type\"]\n", " buy.filter(pl.col(\"source\") == \"Home.co.uk\")[\"Property sub-type\"]\n",
" .value_counts().sort(\"count\", descending=True).head(20)\n", " .value_counts()\n",
" .sort(\"count\", descending=True)\n",
" .head(20)\n",
")\n", ")\n",
"print(\"Top 20 Home.co.uk property sub-types:\")\n", "print(\"Top 20 Home.co.uk property sub-types:\")\n",
"hk_subtypes" "hk_subtypes"
@ -5263,9 +5328,16 @@
"source": [ "source": [
"# Field completeness by source\n", "# Field completeness by source\n",
"fields = [\n", "fields = [\n",
" \"Bedrooms\", \"Bathrooms\", \"Postcode\", \"Address per Property Register\",\n", " \"Bedrooms\",\n",
" \"Leasehold/Freehold\", \"Property type\", \"Total floor area (sqm)\",\n", " \"Bathrooms\",\n",
" \"Listing date\", \"Asking price\", \"Price qualifier\",\n", " \"Postcode\",\n",
" \"Address per Property Register\",\n",
" \"Leasehold/Freehold\",\n",
" \"Property type\",\n",
" \"Total floor area (sqm)\",\n",
" \"Listing date\",\n",
" \"Asking price\",\n",
" \"Price qualifier\",\n",
"]\n", "]\n",
"\n", "\n",
"rows = []\n", "rows = []\n",
@ -5276,17 +5348,19 @@
" non_null = n - subset[f].null_count()\n", " non_null = n - subset[f].null_count()\n",
" # Also count empty strings as missing for string fields\n", " # Also count empty strings as missing for string fields\n",
" if subset[f].dtype == pl.Utf8:\n", " if subset[f].dtype == pl.Utf8:\n",
" non_null = len(subset.filter(\n", " non_null = len(\n",
" pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0)\n", " subset.filter(pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0))\n",
" ))\n", " )\n",
" rows.append({\"source\": src, \"field\": f, \"pct_available\": non_null / n * 100})\n", " rows.append({\"source\": src, \"field\": f, \"pct_available\": non_null / n * 100})\n",
"\n", "\n",
"completeness = pl.DataFrame(rows)\n", "completeness = pl.DataFrame(rows)\n",
"pivot = completeness.pivot(on=\"source\", index=\"field\", values=\"pct_available\")\n", "pivot = completeness.pivot(on=\"source\", index=\"field\", values=\"pct_available\")\n",
"pivot = pivot.with_columns([\n", "pivot = pivot.with_columns(\n",
" pl.col(\"Rightmove\").round(1),\n", " [\n",
" pl.col(\"Home.co.uk\").round(1),\n", " pl.col(\"Rightmove\").round(1),\n",
"])\n", " pl.col(\"Home.co.uk\").round(1),\n",
" ]\n",
")\n",
"print(\"Field completeness (% non-null/non-empty):\")\n", "print(\"Field completeness (% non-null/non-empty):\")\n",
"pivot" "pivot"
] ]
@ -6198,19 +6272,26 @@
"# Bedroom distribution comparison\n", "# Bedroom distribution comparison\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"Rightmove\", \"Home.co.uk\"))\n", "fig = make_subplots(rows=1, cols=2, subplot_titles=(\"Rightmove\", \"Home.co.uk\"))\n",
"for i, src in enumerate([\"Rightmove\", \"Home.co.uk\"], 1):\n", "for i, src in enumerate([\"Rightmove\", \"Home.co.uk\"], 1):\n",
" beds = buy.filter(\n", " beds = (\n",
" (pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8)\n", " buy.filter((pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8))[\"Bedrooms\"]\n",
" )[\"Bedrooms\"].value_counts().sort(\"Bedrooms\")\n", " .value_counts()\n",
" .sort(\"Bedrooms\")\n",
" )\n",
" # Normalise\n", " # Normalise\n",
" total = beds[\"count\"].sum()\n", " total = beds[\"count\"].sum()\n",
" fig.add_trace(\n", " fig.add_trace(\n",
" go.Bar(\n", " go.Bar(\n",
" x=beds[\"Bedrooms\"], y=beds[\"count\"] / total * 100,\n", " x=beds[\"Bedrooms\"],\n",
" y=beds[\"count\"] / total * 100,\n",
" name=src,\n", " name=src,\n",
" marker_color=\"#2563eb\" if src == \"Rightmove\" else \"#10b981\",\n", " marker_color=\"#2563eb\" if src == \"Rightmove\" else \"#10b981\",\n",
" ), row=1, col=i,\n", " ),\n",
" row=1,\n",
" col=i,\n",
" )\n", " )\n",
"fig.update_layout(height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False)\n", "fig.update_layout(\n",
" height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False\n",
")\n",
"fig.update_yaxes(title_text=\"%\", row=1, col=1)\n", "fig.update_yaxes(title_text=\"%\", row=1, col=1)\n",
"fig.show()" "fig.show()"
] ]
@ -6287,17 +6368,23 @@
"\n", "\n",
"comparison_rows = []\n", "comparison_rows = []\n",
"for ptype in [\"Detached\", \"Semi-Detached\", \"Terraced\", \"Flats/Maisonettes\", \"Other\"]:\n", "for ptype in [\"Detached\", \"Semi-Detached\", \"Terraced\", \"Flats/Maisonettes\", \"Other\"]:\n",
" rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n", " rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\n",
" \"Asking price\"\n",
" ].drop_nulls()\n",
" hk_p = hk_only.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n", " hk_p = hk_only.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
" if len(rm_p) > 0 and len(hk_p) > 0:\n", " if len(rm_p) > 0 and len(hk_p) > 0:\n",
" comparison_rows.append({\n", " comparison_rows.append(\n",
" \"Property type\": ptype,\n", " {\n",
" \"RM count\": len(rm_p),\n", " \"Property type\": ptype,\n",
" \"RM median £\": int(rm_p.median()),\n", " \"RM count\": len(rm_p),\n",
" \"HK count\": len(hk_p),\n", " \"RM median £\": int(rm_p.median()),\n",
" \"HK median £\": int(hk_p.median()),\n", " \"HK count\": len(hk_p),\n",
" \"HK premium %\": round((hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1),\n", " \"HK median £\": int(hk_p.median()),\n",
" })\n", " \"HK premium %\": round(\n",
" (hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1\n",
" ),\n",
" }\n",
" )\n",
"\n", "\n",
"comp = pl.DataFrame(comparison_rows)\n", "comp = pl.DataFrame(comparison_rows)\n",
"print(\"Price comparison in covered outcodes (Home.co.uk unique listings vs Rightmove):\")\n", "print(\"Price comparison in covered outcodes (Home.co.uk unique listings vs Rightmove):\")\n",
@ -7245,9 +7332,13 @@
"# Listing age histogram comparison\n", "# Listing age histogram comparison\n",
"age_plot = with_age.filter(pl.col(\"days_on_market\") <= 730) # cap at 2 years\n", "age_plot = with_age.filter(pl.col(\"days_on_market\") <= 730) # cap at 2 years\n",
"fig = px.histogram(\n", "fig = px.histogram(\n",
" age_plot.to_pandas(), x=\"days_on_market\", color=\"source\", nbins=60,\n", " age_plot.to_pandas(),\n",
" x=\"days_on_market\",\n",
" color=\"source\",\n",
" nbins=60,\n",
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n", " color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
" barmode=\"overlay\", histnorm=\"probability density\",\n", " barmode=\"overlay\",\n",
" histnorm=\"probability density\",\n",
" title=\"Days on Market Distribution by Source (normalised, capped at 2 years)\",\n", " title=\"Days on Market Distribution by Source (normalised, capped at 2 years)\",\n",
")\n", ")\n",
"fig.update_traces(opacity=0.6)\n", "fig.update_traces(opacity=0.6)\n",
@ -7330,7 +7421,9 @@
"print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n", "print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n",
"print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n", "print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n",
"print(f\" Projected unique additions: ~{projected_unique:,}\")\n", "print(f\" Projected unique additions: ~{projected_unique:,}\")\n",
"print(f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique/rm_buy*100:.1f}% increase)\")\n", "print(\n",
" f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique / rm_buy * 100:.1f}% increase)\"\n",
")\n",
"print()\n", "print()\n",
"print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")" "print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
] ]

View file

@ -54,11 +54,15 @@
} }
], ],
"source": [ "source": [
"r5_bank = pl.read_parquet(\"../property-data/travel-times/transit/000000-bank-tube-station.parquet\")\n", "r5_bank = pl.read_parquet(\n",
" \"../property-data/travel-times/transit/000000-bank-tube-station.parquet\"\n",
")\n",
"manual_bank = pl.read_parquet(\"../manual-data/journey_times_bank.parquet\")\n", "manual_bank = pl.read_parquet(\"../manual-data/journey_times_bank.parquet\")\n",
"\n", "\n",
"print(f\"R5 Bank: {r5_bank.shape[0]:,} postcodes\")\n", "print(f\"R5 Bank: {r5_bank.shape[0]:,} postcodes\")\n",
"print(f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\")" "print(\n",
" f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\"\n",
")"
] ]
}, },
{ {
@ -116,25 +120,49 @@
"source": [ "source": [
"# Join on postcode, keep only rows where both sources have values\n", "# Join on postcode, keep only rows where both sources have values\n",
"bank = (\n", "bank = (\n",
" r5_bank\n", " r5_bank.join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
" .join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
" .filter(\n", " .filter(\n",
" pl.col(\"public_transport_easy_minutes\").is_not_null()\n", " pl.col(\"public_transport_easy_minutes\").is_not_null()\n",
" & pl.col(\"public_transport_quick_minutes\").is_not_null()\n", " & pl.col(\"public_transport_quick_minutes\").is_not_null()\n",
" )\n", " )\n",
" .with_columns([\n", " .with_columns(\n",
" # Signed error: R5 - Manual (positive = R5 is slower)\n", " [\n",
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).alias(\"error_easy\"),\n", " # Signed error: R5 - Manual (positive = R5 is slower)\n",
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).alias(\"error_quick\"),\n", " (\n",
" # Absolute error\n", " pl.col(\"travel_minutes\").cast(pl.Float64)\n",
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_easy\"),\n", " - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_quick\"),\n", " ).alias(\"error_easy\"),\n",
" ])\n", " (\n",
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
" ).alias(\"error_quick\"),\n",
" # Absolute error\n",
" (\n",
" pl.col(\"travel_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
" )\n",
" .abs()\n",
" .alias(\"abs_error_easy\"),\n",
" (\n",
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
" )\n",
" .abs()\n",
" .alias(\"abs_error_quick\"),\n",
" ]\n",
" )\n",
")\n", ")\n",
"\n", "\n",
"print(f\"Joined (non-null): {bank.shape[0]:,} postcodes\")\n", "print(f\"Joined (non-null): {bank.shape[0]:,} postcodes\")\n",
"bank.select(\"pcds\", \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n", "bank.select(\n",
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\").head(10)" " \"pcds\",\n",
" \"travel_minutes\",\n",
" \"public_transport_easy_minutes\",\n",
" \"error_easy\",\n",
" \"best_minutes\",\n",
" \"public_transport_quick_minutes\",\n",
" \"error_quick\",\n",
").head(10)"
] ]
}, },
{ {
@ -196,18 +224,23 @@
" percentiles = [5, 25, 50, 80, 90, 95, 99]\n", " percentiles = [5, 25, 50, 80, 90, 95, 99]\n",
" rows = []\n", " rows = []\n",
" for p in percentiles:\n", " for p in percentiles:\n",
" rows.append({\n", " rows.append(\n",
" \"percentile\": f\"p{p}\",\n", " {\n",
" f\"{label} signed error\": round(float(np.percentile(col, p)), 1),\n", " \"percentile\": f\"p{p}\",\n",
" f\"{label} absolute error\": round(float(np.percentile(abs_col, p)), 1),\n", " f\"{label} signed error\": round(float(np.percentile(col, p)), 1),\n",
" })\n", " f\"{label} absolute error\": round(float(np.percentile(abs_col, p)), 1),\n",
" rows.append({\n", " }\n",
" \"percentile\": \"mean\",\n", " )\n",
" f\"{label} signed error\": round(float(np.mean(col)), 1),\n", " rows.append(\n",
" f\"{label} absolute error\": round(float(np.mean(abs_col)), 1),\n", " {\n",
" })\n", " \"percentile\": \"mean\",\n",
" f\"{label} signed error\": round(float(np.mean(col)), 1),\n",
" f\"{label} absolute error\": round(float(np.mean(abs_col)), 1),\n",
" }\n",
" )\n",
" return pl.DataFrame(rows)\n", " return pl.DataFrame(rows)\n",
"\n", "\n",
"\n",
"stats_easy = percentile_stats(\"error_easy\", \"Median (easy)\")\n", "stats_easy = percentile_stats(\"error_easy\", \"Median (easy)\")\n",
"stats_quick = percentile_stats(\"error_quick\", \"Best (quick)\")\n", "stats_quick = percentile_stats(\"error_quick\", \"Best (quick)\")\n",
"\n", "\n",
@ -1120,24 +1153,42 @@
} }
], ],
"source": [ "source": [
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n", "fig = make_subplots(\n",
" \"Median transit time error (R5 TfL)\",\n", " rows=1,\n",
" \"Best transit time error (R5 TfL)\"\n", " cols=2,\n",
"])\n", " subplot_titles=[\n",
" \"Median transit time error (R5 TfL)\",\n",
" \"Best transit time error (R5 TfL)\",\n",
" ],\n",
")\n",
"\n", "\n",
"# Clip for readability\n", "# Clip for readability\n",
"easy_clipped = bank[\"error_easy\"].clip(-60, 60).to_numpy()\n", "easy_clipped = bank[\"error_easy\"].clip(-60, 60).to_numpy()\n",
"quick_clipped = bank[\"error_quick\"].clip(-60, 60).to_numpy()\n", "quick_clipped = bank[\"error_quick\"].clip(-60, 60).to_numpy()\n",
"\n", "\n",
"fig.add_trace(go.Histogram(x=easy_clipped, nbinsx=120, name=\"Median (easy)\",\n", "fig.add_trace(\n",
" marker_color=\"#0d9488\"), row=1, col=1)\n", " go.Histogram(\n",
"fig.add_trace(go.Histogram(x=quick_clipped, nbinsx=120, name=\"Best (quick)\",\n", " x=easy_clipped, nbinsx=120, name=\"Median (easy)\", marker_color=\"#0d9488\"\n",
" marker_color=\"#f59e0b\"), row=1, col=2)\n", " ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Histogram(\n",
" x=quick_clipped, nbinsx=120, name=\"Best (quick)\", marker_color=\"#f59e0b\"\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n", "\n",
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=1)\n", "fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=1)\n",
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=2)\n", "fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=2)\n",
"fig.update_yaxes(title_text=\"Count\", row=1, col=1)\n", "fig.update_yaxes(title_text=\"Count\", row=1, col=1)\n",
"fig.update_layout(height=400, showlegend=False, title_text=\"Bank: Error Distribution (clipped ±60 min)\")\n", "fig.update_layout(\n",
" height=400,\n",
" showlegend=False,\n",
" title_text=\"Bank: Error Distribution (clipped ±60 min)\",\n",
")\n",
"fig.show()" "fig.show()"
] ]
}, },
@ -2104,34 +2155,55 @@
"# Sample for scatter plot performance\n", "# Sample for scatter plot performance\n",
"sample = bank.sample(n=min(20_000, bank.shape[0]), seed=42)\n", "sample = bank.sample(n=min(20_000, bank.shape[0]), seed=42)\n",
"\n", "\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n", "fig = make_subplots(\n",
" \"Median: R5 vs TfL (easy)\",\n", " rows=1,\n",
" \"Best: R5 vs TfL (quick)\"\n", " cols=2,\n",
"])\n", " subplot_titles=[\"Median: R5 vs TfL (easy)\", \"Best: R5 vs TfL (quick)\"],\n",
")\n",
"\n", "\n",
"fig.add_trace(go.Scattergl(\n", "fig.add_trace(\n",
" x=sample[\"public_transport_easy_minutes\"].to_numpy(),\n", " go.Scattergl(\n",
" y=sample[\"travel_minutes\"].cast(pl.Float64).to_numpy(),\n", " x=sample[\"public_transport_easy_minutes\"].to_numpy(),\n",
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n", " y=sample[\"travel_minutes\"].cast(pl.Float64).to_numpy(),\n",
" name=\"Median\"\n", " mode=\"markers\",\n",
"), row=1, col=1)\n", " marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n",
" name=\"Median\",\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"\n", "\n",
"fig.add_trace(go.Scattergl(\n", "fig.add_trace(\n",
" x=sample[\"public_transport_quick_minutes\"].to_numpy(),\n", " go.Scattergl(\n",
" y=sample[\"best_minutes\"].cast(pl.Float64).to_numpy(),\n", " x=sample[\"public_transport_quick_minutes\"].to_numpy(),\n",
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n", " y=sample[\"best_minutes\"].cast(pl.Float64).to_numpy(),\n",
" name=\"Best\"\n", " mode=\"markers\",\n",
"), row=1, col=2)\n", " marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n",
" name=\"Best\",\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n", "\n",
"# Perfect agreement line\n", "# Perfect agreement line\n",
"for col in [1, 2]:\n", "for col in [1, 2]:\n",
" fig.add_trace(go.Scatter(x=[0, 200], y=[0, 200], mode=\"lines\",\n", " fig.add_trace(\n",
" line=dict(color=\"red\", dash=\"dash\", width=1),\n", " go.Scatter(\n",
" showlegend=False), row=1, col=col)\n", " x=[0, 200],\n",
" y=[0, 200],\n",
" mode=\"lines\",\n",
" line=dict(color=\"red\", dash=\"dash\", width=1),\n",
" showlegend=False,\n",
" ),\n",
" row=1,\n",
" col=col,\n",
" )\n",
" fig.update_xaxes(title_text=\"TfL API (minutes)\", row=1, col=col)\n", " fig.update_xaxes(title_text=\"TfL API (minutes)\", row=1, col=col)\n",
" fig.update_yaxes(title_text=\"R5 (minutes)\", row=1, col=col)\n", " fig.update_yaxes(title_text=\"R5 (minutes)\", row=1, col=col)\n",
"\n", "\n",
"fig.update_layout(height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\")\n", "fig.update_layout(\n",
" height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\"\n",
")\n",
"fig.show()" "fig.show()"
] ]
}, },
@ -403063,7 +403135,8 @@
"\n", "\n",
"fig = px.scatter_map(\n", "fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n", " map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n", " lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"error_easy\",\n", " color=\"error_easy\",\n",
" color_continuous_scale=\"RdBu_r\", # red=positive (R5 slower), blue=negative (R5 faster)\n", " color_continuous_scale=\"RdBu_r\", # red=positive (R5 slower), blue=negative (R5 faster)\n",
" range_color=[-30, 30],\n", " range_color=[-30, 30],\n",
@ -403071,8 +403144,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n", " center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n", " opacity=0.5,\n",
" title=\"Bank — Median transit error (R5 TfL easy), minutes\",\n", " title=\"Bank — Median transit error (R5 TfL easy), minutes\",\n",
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n", " hover_data={\n",
" \"error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n", " \"pcds\": True,\n",
" \"travel_minutes\": True,\n",
" \"public_transport_easy_minutes\": True,\n",
" \"error_easy\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n", " height=700,\n",
")\n", ")\n",
"fig.update_layout(map_style=\"carto-positron\")\n", "fig.update_layout(map_style=\"carto-positron\")\n",
@ -803994,7 +804073,8 @@
"source": [ "source": [
"fig = px.scatter_map(\n", "fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n", " map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n", " lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"error_quick\",\n", " color=\"error_quick\",\n",
" color_continuous_scale=\"RdBu_r\",\n", " color_continuous_scale=\"RdBu_r\",\n",
" range_color=[-30, 30],\n", " range_color=[-30, 30],\n",
@ -804002,8 +804082,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n", " center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n", " opacity=0.5,\n",
" title=\"Bank — Best transit error (R5 TfL quick), minutes\",\n", " title=\"Bank — Best transit error (R5 TfL quick), minutes\",\n",
" hover_data={\"pcds\": True, \"best_minutes\": True, \"public_transport_quick_minutes\": True,\n", " hover_data={\n",
" \"error_quick\": \":.0f\", \"lat\": False, \"long\": False},\n", " \"pcds\": True,\n",
" \"best_minutes\": True,\n",
" \"public_transport_quick_minutes\": True,\n",
" \"error_quick\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n", " height=700,\n",
")\n", ")\n",
"fig.update_layout(map_style=\"carto-positron\")\n", "fig.update_layout(map_style=\"carto-positron\")\n",
@ -1204925,7 +1205011,8 @@
"source": [ "source": [
"fig = px.scatter_map(\n", "fig = px.scatter_map(\n",
" map_sample.to_pandas(),\n", " map_sample.to_pandas(),\n",
" lat=\"lat\", lon=\"long\",\n", " lat=\"lat\",\n",
" lon=\"long\",\n",
" color=\"abs_error_easy\",\n", " color=\"abs_error_easy\",\n",
" color_continuous_scale=\"YlOrRd\",\n", " color_continuous_scale=\"YlOrRd\",\n",
" range_color=[0, 30],\n", " range_color=[0, 30],\n",
@ -1204933,8 +1205020,14 @@
" center={\"lat\": 51.5, \"lon\": -0.1},\n", " center={\"lat\": 51.5, \"lon\": -0.1},\n",
" opacity=0.5,\n", " opacity=0.5,\n",
" title=\"Bank — Absolute median transit error |R5 TfL easy|, minutes\",\n", " title=\"Bank — Absolute median transit error |R5 TfL easy|, minutes\",\n",
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n", " hover_data={\n",
" \"abs_error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n", " \"pcds\": True,\n",
" \"travel_minutes\": True,\n",
" \"public_transport_easy_minutes\": True,\n",
" \"abs_error_easy\": \":.0f\",\n",
" \"lat\": False,\n",
" \"long\": False,\n",
" },\n",
" height=700,\n", " height=700,\n",
")\n", ")\n",
"fig.update_layout(map_style=\"carto-positron\")\n", "fig.update_layout(map_style=\"carto-positron\")\n",
@ -1204998,9 +1205091,15 @@
], ],
"source": [ "source": [
"bank.sort(\"abs_error_easy\", descending=True).select(\n", "bank.sort(\"abs_error_easy\", descending=True).select(\n",
" \"pcds\", \"lat\", \"long\",\n", " \"pcds\",\n",
" \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n", " \"lat\",\n",
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\",\n", " \"long\",\n",
" \"travel_minutes\",\n",
" \"public_transport_easy_minutes\",\n",
" \"error_easy\",\n",
" \"best_minutes\",\n",
" \"public_transport_quick_minutes\",\n",
" \"error_quick\",\n",
").head(30)" ").head(30)"
] ]
}, },
@ -1205945,45 +1206044,75 @@
"\n", "\n",
"dist_df = bank.with_columns(\n", "dist_df = bank.with_columns(\n",
" # Rough km distance using Haversine approximation\n", " # Rough km distance using Haversine approximation\n",
" ((((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2 +\n", " (\n",
" ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2) ** 0.5\n", " (\n",
" ((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2\n",
" + ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2\n",
" )\n",
" ** 0.5\n",
" ).alias(\"dist_km\")\n", " ).alias(\"dist_km\")\n",
")\n", ")\n",
"\n", "\n",
"# Bin by 5km\n", "# Bin by 5km\n",
"binned = (\n", "binned = (\n",
" dist_df\n", " dist_df.with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
" .with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
" .group_by(\"dist_km\")\n", " .group_by(\"dist_km\")\n",
" .agg([\n", " .agg(\n",
" pl.col(\"error_easy\").median().alias(\"median_error_easy\"),\n", " [\n",
" pl.col(\"error_quick\").median().alias(\"median_error_quick\"),\n", " pl.col(\"error_easy\").median().alias(\"median_error_easy\"),\n",
" pl.col(\"abs_error_easy\").median().alias(\"median_abs_error_easy\"),\n", " pl.col(\"error_quick\").median().alias(\"median_error_quick\"),\n",
" pl.len().alias(\"count\"),\n", " pl.col(\"abs_error_easy\").median().alias(\"median_abs_error_easy\"),\n",
" ])\n", " pl.len().alias(\"count\"),\n",
" ]\n",
" )\n",
" .sort(\"dist_km\")\n", " .sort(\"dist_km\")\n",
" .filter(pl.col(\"count\") > 50)\n", " .filter(pl.col(\"count\") > 50)\n",
")\n", ")\n",
"\n", "\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n", "fig = make_subplots(\n",
" \"Median signed error by distance\",\n", " rows=1,\n",
" \"Median absolute error by distance\"\n", " cols=2,\n",
"])\n", " subplot_titles=[\n",
" \"Median signed error by distance\",\n",
" \"Median absolute error by distance\",\n",
" ],\n",
")\n",
"\n", "\n",
"fig.add_trace(go.Scatter(\n", "fig.add_trace(\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_easy\"].to_numpy(),\n", " go.Scatter(\n",
" mode=\"lines+markers\", name=\"Easy\", line=dict(color=\"#0d9488\")\n", " x=binned[\"dist_km\"].to_numpy(),\n",
"), row=1, col=1)\n", " y=binned[\"median_error_easy\"].to_numpy(),\n",
"fig.add_trace(go.Scatter(\n", " mode=\"lines+markers\",\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_quick\"].to_numpy(),\n", " name=\"Easy\",\n",
" mode=\"lines+markers\", name=\"Quick\", line=dict(color=\"#f59e0b\")\n", " line=dict(color=\"#0d9488\"),\n",
"), row=1, col=1)\n", " ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=binned[\"dist_km\"].to_numpy(),\n",
" y=binned[\"median_error_quick\"].to_numpy(),\n",
" mode=\"lines+markers\",\n",
" name=\"Quick\",\n",
" line=dict(color=\"#f59e0b\"),\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"\n", "\n",
"fig.add_trace(go.Scatter(\n", "fig.add_trace(\n",
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_abs_error_easy\"].to_numpy(),\n", " go.Scatter(\n",
" mode=\"lines+markers\", name=\"|Easy|\", line=dict(color=\"#0d9488\"),\n", " x=binned[\"dist_km\"].to_numpy(),\n",
" showlegend=False\n", " y=binned[\"median_abs_error_easy\"].to_numpy(),\n",
"), row=1, col=2)\n", " mode=\"lines+markers\",\n",
" name=\"|Easy|\",\n",
" line=dict(color=\"#0d9488\"),\n",
" showlegend=False,\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"\n", "\n",
"for col in [1, 2]:\n", "for col in [1, 2]:\n",
" fig.update_xaxes(title_text=\"Distance from Bank (km)\", row=1, col=col)\n", " fig.update_xaxes(title_text=\"Distance from Bank (km)\", row=1, col=col)\n",

View file

@ -16,9 +16,21 @@ SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "3"))
# Whether to run a scrape immediately on startup # Whether to run a scrape immediately on startup
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes") RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
# Enable/disable individual sources # Enable/disable individual sources
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in ("1", "true", "yes") SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in (
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in ("1", "true", "yes") "1",
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in ("1", "true", "yes") "true",
"yes",
)
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in (
"1",
"true",
"yes",
)
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
"1",
"true",
"yes",
)
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead" TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search" SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"

View file

@ -86,7 +86,8 @@ def solve_cloudflare() -> tuple[dict[str, str], str] | None:
log.info( log.info(
"Cloudflare solved — got %d cookies, UA: %s", "Cloudflare solved — got %d cookies, UA: %s",
len(cookies), user_agent[:60], len(cookies),
user_agent[:60],
) )
flaresolverr_attempts_total.labels(result="success").inc() flaresolverr_attempts_total.labels(result="success").inc()
return cookies, user_agent return cookies, user_agent
@ -129,11 +130,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
to Chrome's JA3 fingerprint from FlareSolverr) remain valid.""" to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
session = Session(impersonate="chrome") session = Session(impersonate="chrome")
session.headers.update({ session.headers.update(
"User-Agent": user_agent, {
"Accept": "application/json, text/plain, */*", "User-Agent": user_agent,
"x-requested-with": "XMLHttpRequest", "Accept": "application/json, text/plain, */*",
}) "x-requested-with": "XMLHttpRequest",
}
)
# Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the # Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the
# X-XSRF-TOKEN request header (URL-decoded). Without this header, the # X-XSRF-TOKEN request header (URL-decoded). Without this header, the
# server rejects every request with 419/403. # server rejects every request with 419/403.
@ -165,7 +168,11 @@ def fetch_page(
return resp.json() return resp.json()
except json.JSONDecodeError: except json.JSONDecodeError:
homecouk_errors_total.labels(type="json_decode").inc() homecouk_errors_total.labels(type="json_decode").inc()
log.error("Non-JSON response from %s (got %s)", url, resp.headers.get("content-type", "?")) log.error(
"Non-JSON response from %s (got %s)",
url,
resp.headers.get("content-type", "?"),
)
return None return None
if resp.status_code == 403: if resp.status_code == 403:
raise CookiesExpiredError("HTTP 403 — cookies likely expired") raise CookiesExpiredError("HTTP 403 — cookies likely expired")
@ -173,7 +180,11 @@ def fetch_page(
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning( log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs", "HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code, url, attempt + 1, max_retries, delay, resp.status_code,
url,
attempt + 1,
max_retries,
delay,
) )
time.sleep(delay) time.sleep(delay)
continue continue
@ -186,7 +197,11 @@ def fetch_page(
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning( log.warning(
"%s from %s, retry %d/%d in %.1fs", "%s from %s, retry %d/%d in %.1fs",
type(e).__name__, url, attempt + 1, max_retries, delay, type(e).__name__,
url,
attempt + 1,
max_retries,
delay,
) )
time.sleep(delay) time.sleep(delay)
homecouk_errors_total.labels(type="retry_exhausted").inc() homecouk_errors_total.labels(type="retry_exhausted").inc()
@ -218,7 +233,12 @@ def map_property_type(raw_type: str | None) -> str:
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc. # Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
# Try common patterns # Try common patterns
lower = raw_type.lower() lower = raw_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower: if (
"flat" in lower
or "apartment" in lower
or "maisonette" in lower
or "studio" in lower
):
return "Flats/Maisonettes" return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower: if "detached" in lower and "semi" not in lower:
return "Detached" return "Detached"
@ -231,7 +251,9 @@ def map_property_type(raw_type: str | None) -> str:
def transform_property( def transform_property(
prop: dict, channel: str, pc_index: PostcodeSpatialIndex, prop: dict,
channel: str,
pc_index: PostcodeSpatialIndex,
) -> dict | None: ) -> dict | None:
"""Transform a raw home.co.uk property dict into our output schema.""" """Transform a raw home.co.uk property dict into our output schema."""
lat = prop.get("latitude") lat = prop.get("latitude")

View file

@ -11,7 +11,9 @@ from metrics import http_errors_total, http_requests_total, ip_rotations_total
log = logging.getLogger("rightmove") log = logging.getLogger("rightmove")
_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0) _ua = UserAgent(
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
)
def _endpoint_label(url: str) -> str: def _endpoint_label(url: str) -> str:
@ -27,6 +29,7 @@ def _status_label(code: int) -> str:
return "5xx" return "5xx"
return str(code) return str(code)
# Gluetun control API — runs on port 8000 inside the gluetun container. # Gluetun control API — runs on port 8000 inside the gluetun container.
# Since finder uses network_mode: service:gluetun, localhost IS gluetun. # Since finder uses network_mode: service:gluetun, localhost IS gluetun.
GLUETUN_API = "http://127.0.0.1:8000" GLUETUN_API = "http://127.0.0.1:8000"
@ -42,17 +45,25 @@ def rotate_ip() -> bool:
# Get current IP # Get current IP
with httpx.Client(timeout=10) as ctl: with httpx.Client(timeout=10) as ctl:
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip") old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown" old_ip = (
old_ip_resp.json().get("public_ip", "unknown")
if old_ip_resp.status_code == 200
else "unknown"
)
log.info("Current IP: %s", old_ip) log.info("Current IP: %s", old_ip)
# Trigger server change — PUT with empty JSON body picks a random server # Trigger server change — PUT with empty JSON body picks a random server
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}) resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}
)
if resp.status_code != 200: if resp.status_code != 200:
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text) log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
return False return False
time.sleep(2) time.sleep(2)
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}) resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}
)
if resp.status_code != 200: if resp.status_code != 200:
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text) log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
return False return False
@ -99,7 +110,9 @@ def fetch_with_retry(
for attempt in range(MAX_RETRIES): for attempt in range(MAX_RETRIES):
try: try:
resp = client.get(url, params=params) resp = client.get(url, params=params)
http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc() http_requests_total.labels(
status=_status_label(resp.status_code), endpoint=endpoint
).inc()
if resp.status_code == 200: if resp.status_code == 200:
return resp.json() return resp.json()
if resp.status_code == 403 and on_403: if resp.status_code == 403 and on_403:
@ -111,15 +124,34 @@ def fetch_with_retry(
return None return None
if resp.status_code in (429, 500, 502, 503, 504): if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay) log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay) time.sleep(delay)
continue continue
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url) log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
return None return None
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e: except (
httpx.ConnectError,
httpx.ReadTimeout,
httpx.WriteTimeout,
httpx.PoolTimeout,
) as e:
http_errors_total.labels(type=type(e).__name__).inc() http_errors_total.labels(type=type(e).__name__).inc()
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay) log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay) time.sleep(delay)
http_errors_total.labels(type="retry_exhausted").inc() http_errors_total.labels(type="retry_exhausted").inc()
log.error("All %d retries exhausted for %s", MAX_RETRIES, url) log.error("All %d retries exhausted for %s", MAX_RETRIES, url)

View file

@ -7,7 +7,14 @@ from pathlib import Path
from flask import Flask, Response, jsonify, send_from_directory from flask import Flask, Response, jsonify, send_from_directory
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
from constants import DATA_DIR, RUN_ON_STARTUP, SCHEDULE_HOUR, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE from constants import (
DATA_DIR,
RUN_ON_STARTUP,
SCHEDULE_HOUR,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
)
from homecouk import load_cookies as load_homecouk_cookies from homecouk import load_cookies as load_homecouk_cookies
from openrent import load_cookies as load_openrent_cookies from openrent import load_cookies as load_openrent_cookies
from rightmove import outcode_cache from rightmove import outcode_cache
@ -49,8 +56,13 @@ log.info("Loading arcgis data...")
OUTCODES = load_outcodes() OUTCODES = load_outcodes()
PC_INDEX = build_postcode_index() PC_INDEX = build_postcode_index()
PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None
log.info("Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)", log.info(
len(OUTCODES), SCRAPE_RIGHTMOVE, SCRAPE_HOMECOUK, SCRAPE_OPENRENT) "Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)",
len(OUTCODES),
SCRAPE_RIGHTMOVE,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Scheduler # Scheduler
@ -63,7 +75,9 @@ def _start_scrape() -> bool:
if status.state == "running": if status.state == "running":
return False return False
status.state = "running" status.state = "running"
thread = threading.Thread(target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True) thread = threading.Thread(
target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True
)
thread.start() thread.start()
return True return True
@ -82,7 +96,9 @@ def _scheduler_loop() -> None:
log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR) log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR)
while True: while True:
wait = _seconds_until(SCHEDULE_HOUR) wait = _seconds_until(SCHEDULE_HOUR)
log.info("Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600) log.info(
"Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600
)
time.sleep(wait) time.sleep(wait)
log.info("Scheduled scrape triggered") log.info("Scheduled scrape triggered")
if not _start_scrape(): if not _start_scrape():
@ -144,15 +160,17 @@ def get_status():
def get_debug(): def get_debug():
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None
return jsonify({ return jsonify(
"outcode_cache_size": len(outcode_cache), {
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]), "outcode_cache_size": len(outcode_cache),
"scrape_rightmove": SCRAPE_RIGHTMOVE, "outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
"scrape_homecouk": SCRAPE_HOMECOUK, "scrape_rightmove": SCRAPE_RIGHTMOVE,
"scrape_openrent": SCRAPE_OPENRENT, "scrape_homecouk": SCRAPE_HOMECOUK,
"homecouk_cookies_available": hk_cookies is not None, "scrape_openrent": SCRAPE_OPENRENT,
"openrent_cookies_available": or_cookies is not None, "homecouk_cookies_available": hk_cookies is not None,
}) "openrent_cookies_available": or_cookies is not None,
}
)
@app.route("/metrics") @app.route("/metrics")

View file

@ -79,7 +79,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
if "AwsWafIntegration" in content: if "AwsWafIntegration" in content:
log.info("Got WAF challenge page, waiting for resolution...") log.info("Got WAF challenge page, waiting for resolution...")
page.wait_for_selector( page.wait_for_selector(
"a.pli, .pli, .search-property-card", timeout=30000, "a.pli, .pli, .search-property-card",
timeout=30000,
) )
raw_cookies = context.cookies() raw_cookies = context.cookies()
@ -94,7 +95,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
log.info( log.info(
"AWS WAF solved — got %d cookies, UA: %s", "AWS WAF solved — got %d cookies, UA: %s",
len(cookies), user_agent[:60], len(cookies),
user_agent[:60],
) )
flaresolverr_attempts_total.labels(result="success").inc() flaresolverr_attempts_total.labels(result="success").inc()
return cookies, user_agent return cookies, user_agent
@ -130,11 +132,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
"""Create a curl_cffi Session configured for OpenRent. """Create a curl_cffi Session configured for OpenRent.
Uses Chrome TLS impersonation so AWS WAF cookies remain valid.""" Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
session = Session(impersonate="chrome") session = Session(impersonate="chrome")
session.headers.update({ session.headers.update(
"User-Agent": user_agent, {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "User-Agent": user_agent,
"Accept-Language": "en-GB,en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
}) "Accept-Language": "en-GB,en;q=0.9",
}
)
for name, value in cookies.items(): for name, value in cookies.items():
session.cookies.set(name, value, domain="openrent.co.uk") session.cookies.set(name, value, domain="openrent.co.uk")
return session return session
@ -152,7 +156,9 @@ def _status_label(code: int) -> str:
def fetch_page( def fetch_page(
client: Session, url: str, max_retries: int = 3, client: Session,
url: str,
max_retries: int = 3,
) -> str | None: ) -> str | None:
"""GET HTML with retries on 429/5xx. Returns None on permanent failure. """GET HTML with retries on 429/5xx. Returns None on permanent failure.
WAF challenge (202 or 403 with challenge JS) raises WafChallengeError.""" WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
@ -165,17 +171,25 @@ def fetch_page(
html = resp.text html = resp.text
# Detect WAF challenge page masquerading as 200 # Detect WAF challenge page masquerading as 200
if "AwsWafIntegration" in html and "challenge.js" in html: if "AwsWafIntegration" in html and "challenge.js" in html:
raise WafChallengeError("Got AWS WAF challenge page — cookies expired") raise WafChallengeError(
"Got AWS WAF challenge page — cookies expired"
)
return html return html
if resp.status_code in (202, 403): if resp.status_code in (202, 403):
raise WafChallengeError(f"HTTP {resp.status_code} — cookies likely expired") raise WafChallengeError(
f"HTTP {resp.status_code} — cookies likely expired"
)
if resp.status_code in (429, 500, 502, 503, 504): if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2 ** attempt) delay = RETRY_BASE_DELAY * (2**attempt)
log.warning( log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs", "HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code, url, attempt + 1, max_retries, delay, resp.status_code,
url,
attempt + 1,
max_retries,
delay,
) )
time.sleep(delay) time.sleep(delay)
continue continue
@ -187,10 +201,14 @@ def fetch_page(
raise raise
except RequestsError as e: except RequestsError as e:
openrent_errors_total.labels(type=type(e).__name__).inc() openrent_errors_total.labels(type=type(e).__name__).inc()
delay = RETRY_BASE_DELAY * (2 ** attempt) delay = RETRY_BASE_DELAY * (2**attempt)
log.warning( log.warning(
"%s from %s, retry %d/%d in %.1fs", "%s from %s, retry %d/%d in %.1fs",
type(e).__name__, url, attempt + 1, max_retries, delay, type(e).__name__,
url,
attempt + 1,
max_retries,
delay,
) )
time.sleep(delay) time.sleep(delay)
@ -247,7 +265,9 @@ def _extract_bedrooms_from_title(title: str) -> int | None:
return None return None
def _extract_beds_baths_from_features(feature_items: list) -> tuple[int | None, int | None]: def _extract_beds_baths_from_features(
feature_items: list,
) -> tuple[int | None, int | None]:
"""Extract bedrooms and bathrooms from feature list items. """Extract bedrooms and bathrooms from feature list items.
OpenRent search cards have <ul> with items like: OpenRent search cards have <ul> with items like:
@ -442,11 +462,7 @@ def parse_search_results(html: str) -> list[dict]:
# --- Coordinates from data attributes (may not be present on cards) --- # --- Coordinates from data attributes (may not be present on cards) ---
for el in [card] + card.select("[data-lat], [data-latitude]"): for el in [card] + card.select("[data-lat], [data-latitude]"):
lat = el.get("data-lat") or el.get("data-latitude") lat = el.get("data-lat") or el.get("data-latitude")
lng = ( lng = el.get("data-lng") or el.get("data-longitude") or el.get("data-lon")
el.get("data-lng")
or el.get("data-longitude")
or el.get("data-lon")
)
if lat and lng: if lat and lng:
try: try:
prop["lat"] = float(lat) prop["lat"] = float(lat)
@ -543,9 +559,7 @@ def parse_property_detail(html: str) -> dict:
break break
# --- Description for floor area --- # --- Description for floor area ---
desc_el = soup.select_one( desc_el = soup.select_one(".description, [class*='description'], #description")
".description, [class*='description'], #description"
)
if desc_el: if desc_el:
details["description"] = desc_el.get_text(strip=True) details["description"] = desc_el.get_text(strip=True)
@ -567,7 +581,12 @@ def map_property_type(raw_type: str | None) -> str:
lower = raw_type.lower() lower = raw_type.lower()
if "room" in lower or "shared" in lower: if "room" in lower or "shared" in lower:
return "Other" return "Other"
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower: if (
"flat" in lower
or "apartment" in lower
or "maisonette" in lower
or "studio" in lower
):
return "Flats/Maisonettes" return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower: if "detached" in lower and "semi" not in lower:
return "Detached" return "Detached"
@ -647,7 +666,8 @@ def transform_property(
elif search_data.get("outcode"): elif search_data.get("outcode"):
# No spatial index — try outcode lookup as fallback # No spatial index — try outcode lookup as fallback
outcode_pcs = _resolve_outcode_postcodes( outcode_pcs = _resolve_outcode_postcodes(
search_data["outcode"], pc_coords, search_data["outcode"],
pc_coords,
) )
if outcode_pcs: if outcode_pcs:
postcode = outcode_pcs[0] postcode = outcode_pcs[0]
@ -708,7 +728,8 @@ def transform_property(
prop_id = search_data.get("id", "") prop_id = search_data.get("id", "")
listing_url = search_data.get( listing_url = search_data.get(
"url", f"{OPENRENT_BASE}/{prop_id}" if prop_id else "", "url",
f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
) )
description = detail.get("description") or search_data.get("description", "") description = detail.get("description") or search_data.get("description", "")
@ -775,7 +796,10 @@ def search_outcode(
time.sleep(DELAY_BETWEEN_PAGES * 0.5) time.sleep(DELAY_BETWEEN_PAGES * 0.5)
transformed = transform_property( transformed = transform_property(
search_data, detail_data, pc_index, pc_coords, search_data,
detail_data,
pc_index,
pc_coords,
) )
if transformed: if transformed:
properties.append(transformed) properties.append(transformed)

View file

@ -24,7 +24,9 @@ def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
if outcode in outcode_cache: if outcode in outcode_cache:
return outcode_cache[outcode] return outcode_cache[outcode]
data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}) data = fetch_with_retry(
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
)
if not data: if not data:
return None return None
@ -61,7 +63,12 @@ def search_outcode(
data = fetch_with_retry(client, SEARCH_URL, params) data = fetch_with_retry(client, SEARCH_URL, params)
if not data: if not data:
log.warning("Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"]) log.warning(
"Failed to fetch index %d for %s/%s",
index,
outcode,
channel_cfg["channel"],
)
break break
raw_props = data.get("properties", []) raw_props = data.get("properties", [])

View file

@ -6,7 +6,16 @@ from dataclasses import dataclass, field
import polars as pl import polars as pl
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE, SEED from constants import (
ARCGIS_PATH,
CHANNELS,
DATA_DIR,
DELAY_BETWEEN_OUTCODES,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
SEED,
)
from homecouk import CookiesExpiredError from homecouk import CookiesExpiredError
from homecouk import load_cookies as load_homecouk_cookies from homecouk import load_cookies as load_homecouk_cookies
from homecouk import make_client as make_homecouk_client from homecouk import make_client as make_homecouk_client
@ -64,13 +73,23 @@ def _sync_gauges() -> None:
scrape_outcodes_done.set(status.outcodes_done) scrape_outcodes_done.set(status.outcodes_done)
scrape_outcodes_total.set(status.outcodes_total) scrape_outcodes_total.set(status.outcodes_total)
# Total properties (both sources combined) # Total properties (both sources combined)
scrape_properties_total.labels(channel="buy", source="total").set(status.properties_buy) scrape_properties_total.labels(channel="buy", source="total").set(
scrape_properties_total.labels(channel="rent", source="total").set(status.properties_rent) status.properties_buy
)
scrape_properties_total.labels(channel="rent", source="total").set(
status.properties_rent
)
# Per-source breakdown for current channel # Per-source breakdown for current channel
ch = "buy" if status.channel == "BUY" else "rent" ch = "buy" if status.channel == "BUY" else "rent"
scrape_properties_total.labels(channel=ch, source="rightmove").set(status.rm_properties) scrape_properties_total.labels(channel=ch, source="rightmove").set(
scrape_properties_total.labels(channel=ch, source="homecouk").set(status.hk_properties) status.rm_properties
scrape_properties_total.labels(channel=ch, source="openrent").set(status.or_properties) )
scrape_properties_total.labels(channel=ch, source="homecouk").set(
status.hk_properties
)
scrape_properties_total.labels(channel=ch, source="openrent").set(
status.or_properties
)
if status.started_at: if status.started_at:
end = status.finished_at if status.finished_at else time.time() end = status.finished_at if status.finished_at else time.time()
scrape_elapsed_seconds.set(end - status.started_at) scrape_elapsed_seconds.set(end - status.started_at)
@ -86,7 +105,9 @@ def load_outcodes() -> list[str]:
log.info("England postcodes: %d", len(england)) log.info("England postcodes: %d", len(england))
outcodes = ( outcodes = (
england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode")) england.select(
pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode")
)
.drop_nulls() .drop_nulls()
.get_column("outcode") .get_column("outcode")
.unique() .unique()
@ -101,7 +122,9 @@ def build_postcode_index() -> PostcodeSpatialIndex:
"""Build spatial index from arcgis England postcodes.""" """Build spatial index from arcgis England postcodes."""
log.info("Building postcode spatial index from %s", ARCGIS_PATH) log.info("Building postcode spatial index from %s", ARCGIS_PATH)
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"]) df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"]) england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
subset=["lat", "long"]
)
return PostcodeSpatialIndex( return PostcodeSpatialIndex(
england.get_column("lat").to_list(), england.get_column("lat").to_list(),
england.get_column("long").to_list(), england.get_column("long").to_list(),
@ -114,7 +137,9 @@ def build_postcode_coords() -> dict[str, tuple[float, float]]:
Used by OpenRent scraper to resolve coordinates from postcodes.""" Used by OpenRent scraper to resolve coordinates from postcodes."""
log.info("Building postcode coords lookup from %s", ARCGIS_PATH) log.info("Building postcode coords lookup from %s", ARCGIS_PATH)
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"]) df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"]) england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
subset=["lat", "long"]
)
coords: dict[str, tuple[float, float]] = {} coords: dict[str, tuple[float, float]] = {}
for pcd, lat, lng in zip( for pcd, lat, lng in zip(
england.get_column("pcd").to_list(), england.get_column("pcd").to_list(),
@ -179,7 +204,9 @@ def run_scrape(
log.info("home.co.uk scraping ENABLED") log.info("home.co.uk scraping ENABLED")
homecouk_enabled.set(1) homecouk_enabled.set(1)
else: else:
log.info("home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)") log.info(
"home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)"
)
homecouk_enabled.set(0) homecouk_enabled.set(0)
# OpenRent: must be enabled via SCRAPE_OPENRENT + cookies available # OpenRent: must be enabled via SCRAPE_OPENRENT + cookies available
@ -195,7 +222,9 @@ def run_scrape(
log.info("OpenRent scraping ENABLED") log.info("OpenRent scraping ENABLED")
openrent_enabled.set(1) openrent_enabled.set(1)
else: else:
log.info("OpenRent scraping DISABLED (need FlareSolverr or OPENRENT_WAF_TOKEN)") log.info(
"OpenRent scraping DISABLED (need FlareSolverr or OPENRENT_WAF_TOKEN)"
)
openrent_enabled.set(0) openrent_enabled.set(0)
# Build postcode coords if OpenRent is active and caller didn't provide them # Build postcode coords if OpenRent is active and caller didn't provide them
@ -207,7 +236,9 @@ def run_scrape(
channel_name = channel_cfg["channel"] channel_name = channel_cfg["channel"]
file_suffix = "buy" if channel_name == "BUY" else "rent" file_suffix = "buy" if channel_name == "BUY" else "rent"
all_properties: dict[str, dict] = {} # dedup by id all_properties: dict[str, dict] = {} # dedup by id
seen_dedup_keys: set[tuple] = set() # cross-source dedup by (postcode, beds, price) seen_dedup_keys: set[tuple] = (
set()
) # cross-source dedup by (postcode, beds, price)
rm_count = 0 # Rightmove properties this channel rm_count = 0 # Rightmove properties this channel
hk_count = 0 # home.co.uk properties this channel hk_count = 0 # home.co.uk properties this channel
hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates
@ -222,15 +253,22 @@ def run_scrape(
status.hk_properties = 0 status.hk_properties = 0
status.or_properties = 0 status.or_properties = 0
log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled)) log.info(
"=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled)
)
for i, outcode in enumerate(shuffled): for i, outcode in enumerate(shuffled):
with status_lock: with status_lock:
status.outcode = outcode status.outcode = outcode
status.outcodes_done = i status.outcodes_done = i
log.debug("Outcode %s (%d/%d) — %d properties so far", log.debug(
outcode, i + 1, len(shuffled), len(all_properties)) "Outcode %s (%d/%d) — %d properties so far",
outcode,
i + 1,
len(shuffled),
len(all_properties),
)
made_requests = False made_requests = False
@ -240,9 +278,13 @@ def run_scrape(
try: try:
outcode_id = resolve_outcode_id(client, outcode) outcode_id = resolve_outcode_id(client, outcode)
if not outcode_id: if not outcode_id:
log.debug("No Rightmove ID for outcode %s, skipping", outcode) log.debug(
"No Rightmove ID for outcode %s, skipping", outcode
)
else: else:
props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index) props = search_outcode(
client, outcode_id, outcode, channel_cfg, pc_index
)
for p in props: for p in props:
pid = p["id"] pid = p["id"]
if pid not in all_properties: if pid not in all_properties:
@ -261,7 +303,10 @@ def run_scrape(
made_requests = True made_requests = True
try: try:
hk_props = homecouk_search_outcode( hk_props = homecouk_search_outcode(
hk_client, outcode, channel_name, pc_index, hk_client,
outcode,
channel_name,
pc_index,
) )
for p in hk_props: for p in hk_props:
pid = p["id"] pid = p["id"]
@ -276,9 +321,13 @@ def run_scrape(
seen_dedup_keys.add(key) seen_dedup_keys.add(key)
hk_count += 1 hk_count += 1
if hk_props: if hk_props:
log.info("home.co.uk %s: +%d properties", outcode, len(hk_props)) log.info(
"home.co.uk %s: +%d properties", outcode, len(hk_props)
)
except CookiesExpiredError: except CookiesExpiredError:
log.warning("home.co.uk cookies expired — attempting refresh via FlareSolverr") log.warning(
"home.co.uk cookies expired — attempting refresh via FlareSolverr"
)
hk_client.close() hk_client.close()
hk_result = load_homecouk_cookies() hk_result = load_homecouk_cookies()
if hk_result: if hk_result:
@ -286,13 +335,17 @@ def run_scrape(
log.info("home.co.uk cookies refreshed, continuing") log.info("home.co.uk cookies refreshed, continuing")
cookie_refreshes_total.labels(result="success").inc() cookie_refreshes_total.labels(result="success").inc()
else: else:
log.warning("Cookie refresh failed, disabling home.co.uk for rest of scrape") log.warning(
"Cookie refresh failed, disabling home.co.uk for rest of scrape"
)
hk_client = None hk_client = None
hk_failed = True hk_failed = True
homecouk_enabled.set(0) homecouk_enabled.set(0)
cookie_refreshes_total.labels(result="failure").inc() cookie_refreshes_total.labels(result="failure").inc()
with status_lock: with status_lock:
status.errors.append("home.co.uk cookies expired and refresh failed") status.errors.append(
"home.co.uk cookies expired and refresh failed"
)
except Exception as e: except Exception as e:
msg = f"Error scraping home.co.uk {outcode}/{channel_name}: {e}" msg = f"Error scraping home.co.uk {outcode}/{channel_name}: {e}"
log.error(msg) log.error(msg)
@ -305,7 +358,10 @@ def run_scrape(
made_requests = True made_requests = True
try: try:
or_props = openrent_search_outcode( or_props = openrent_search_outcode(
or_client, outcode, pc_index, pc_coords, or_client,
outcode,
pc_index,
pc_coords,
) )
for p in or_props: for p in or_props:
pid = p["id"] pid = p["id"]
@ -318,9 +374,13 @@ def run_scrape(
seen_dedup_keys.add(key) seen_dedup_keys.add(key)
or_count += 1 or_count += 1
if or_props: if or_props:
log.info("OpenRent %s: +%d properties", outcode, len(or_props)) log.info(
"OpenRent %s: +%d properties", outcode, len(or_props)
)
except WafChallengeError: except WafChallengeError:
log.warning("OpenRent WAF cookies expired — attempting refresh via FlareSolverr") log.warning(
"OpenRent WAF cookies expired — attempting refresh via FlareSolverr"
)
or_client.close() or_client.close()
or_result = load_openrent_cookies() or_result = load_openrent_cookies()
if or_result: if or_result:
@ -328,13 +388,17 @@ def run_scrape(
log.info("OpenRent cookies refreshed, continuing") log.info("OpenRent cookies refreshed, continuing")
cookie_refreshes_total.labels(result="success").inc() cookie_refreshes_total.labels(result="success").inc()
else: else:
log.warning("Cookie refresh failed, disabling OpenRent for rest of scrape") log.warning(
"Cookie refresh failed, disabling OpenRent for rest of scrape"
)
or_client = None or_client = None
or_failed = True or_failed = True
openrent_enabled.set(0) openrent_enabled.set(0)
cookie_refreshes_total.labels(result="failure").inc() cookie_refreshes_total.labels(result="failure").inc()
with status_lock: with status_lock:
status.errors.append("OpenRent WAF cookies expired and refresh failed") status.errors.append(
"OpenRent WAF cookies expired and refresh failed"
)
except Exception as e: except Exception as e:
msg = f"Error scraping OpenRent {outcode}/{channel_name}: {e}" msg = f"Error scraping OpenRent {outcode}/{channel_name}: {e}"
log.error(msg) log.error(msg)
@ -352,8 +416,14 @@ def run_scrape(
status.or_properties = or_count status.or_properties = or_count
_sync_gauges() _sync_gauges()
log.info("Outcode %s: total %d (rm: %d, hk: %d, or: %d)", log.info(
outcode, len(all_properties), rm_count, hk_count, or_count) "Outcode %s: total %d (rm: %d, hk: %d, or: %d)",
outcode,
len(all_properties),
rm_count,
hk_count,
or_count,
)
if made_requests and i < len(shuffled) - 1: if made_requests and i < len(shuffled) - 1:
time.sleep(DELAY_BETWEEN_OUTCODES) time.sleep(DELAY_BETWEEN_OUTCODES)
@ -373,7 +443,11 @@ def run_scrape(
log.info( log.info(
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===", "=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===",
channel_name, len(deduped), rm_count, hk_count, or_count, channel_name,
len(deduped),
rm_count,
hk_count,
or_count,
hk_dedup_count + or_dedup_count, hk_dedup_count + or_dedup_count,
) )
@ -382,8 +456,12 @@ def run_scrape(
status.finished_at = time.time() status.finished_at = time.time()
_sync_gauges() _sync_gauges()
elapsed = status.finished_at - status.started_at elapsed = status.finished_at - status.started_at
log.info("Scrape complete in %.0fs — buy: %d, rent: %d", log.info(
elapsed, status.properties_buy, status.properties_rent) "Scrape complete in %.0fs — buy: %d, rent: %d",
elapsed,
status.properties_buy,
status.properties_rent,
)
except Exception as e: except Exception as e:
log.exception("Fatal scrape error") log.exception("Fatal scrape error")

View file

@ -11,12 +11,16 @@ class PostcodeSpatialIndex:
"""Grid-based spatial index over arcgis postcodes for nearest-lookup.""" """Grid-based spatial index over arcgis postcodes for nearest-lookup."""
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]): def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list) self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(
list
)
for lat, lng, pcd in zip(lats, lngs, postcodes): for lat, lng, pcd in zip(lats, lngs, postcodes):
gx = int(math.floor(lng / GRID_CELL_SIZE)) gx = int(math.floor(lng / GRID_CELL_SIZE))
gy = int(math.floor(lat / GRID_CELL_SIZE)) gy = int(math.floor(lat / GRID_CELL_SIZE))
self.grid[(gx, gy)].append((lat, lng, pcd)) self.grid[(gx, gy)].append((lat, lng, pcd))
log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats)) log.info(
"Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats)
)
def nearest(self, lat: float, lng: float) -> str | None: def nearest(self, lat: float, lng: float) -> str | None:
gx = int(math.floor(lng / GRID_CELL_SIZE)) gx = int(math.floor(lng / GRID_CELL_SIZE))

View file

@ -60,9 +60,7 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
"Property type": [p["Property type"] for p in properties], "Property type": [p["Property type"] for p in properties],
"Property sub-type": [p["Property sub-type"] for p in properties], "Property sub-type": [p["Property sub-type"] for p in properties],
"Price qualifier": [p["Price qualifier"] for p in properties], "Price qualifier": [p["Price qualifier"] for p in properties],
"Total floor area (sqm)": [ "Total floor area (sqm)": [p["Total floor area (sqm)"] for p in properties],
p["Total floor area (sqm)"] for p in properties
],
"Listing URL": [p["Listing URL"] for p in properties], "Listing URL": [p["Listing URL"] for p in properties],
"Listing features": [p["Listing features"] for p in properties], "Listing features": [p["Listing features"] for p in properties],
"Listing date": listing_dates, "Listing date": listing_dates,

View file

@ -51,9 +51,19 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
if 49 <= lat <= 56 and -7 <= lng <= 2: if 49 <= lat <= 56 and -7 <= lng <= 2:
return lat, lng return lat, lng
if 49 <= lng <= 56 and -7 <= lat <= 2: if 49 <= lng <= 56 and -7 <= lat <= 2:
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat) log.debug(
"Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f",
lat,
lng,
lng,
lat,
)
return lng, lat return lng, lat
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng) log.warning(
"Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f",
lat,
lng,
)
return lat, lng return lat, lng
@ -66,7 +76,9 @@ def normalize_price(amount: int, frequency: str) -> int:
return amount return amount
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None: def transform_property(
prop: dict, outcode: str, pc_index: PostcodeSpatialIndex
) -> dict | None:
"""Transform a raw Rightmove property dict into our output schema.""" """Transform a raw Rightmove property dict into our output schema."""
loc = prop.get("location") loc = prop.get("location")
if not loc: if not loc:
@ -86,13 +98,19 @@ def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex)
price = normalize_price(int(amount), frequency) price = normalize_price(int(amount), frequency)
display_prices = price_obj.get("displayPrices", []) display_prices = price_obj.get("displayPrices", [])
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else "" price_qualifier = (
display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
)
sub_type = prop.get("propertySubType", "") sub_type = prop.get("propertySubType", "")
bedrooms = prop.get("bedrooms", 0) or 0 bedrooms = prop.get("bedrooms", 0) or 0
bathrooms = prop.get("bathrooms", 0) or 0 bathrooms = prop.get("bathrooms", 0) or 0
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")] key_features = [
kf.get("description", "")
for kf in prop.get("keyFeatures", [])
if kf.get("description")
]
postcode = pc_index.nearest(lat, lng) postcode = pc_index.nearest(lat, lng)
if not postcode: if not postcode:

View file

@ -40,7 +40,9 @@ def download_and_convert(output_path: Path) -> None:
df = pl.concat(frames) df = pl.concat(frames)
print(f"Total rows: {df.height}") print(f"Total rows: {df.height}")
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns( result = df.rename(
{"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}
).with_columns(
pl.col("population").cast(pl.UInt32), pl.col("population").cast(pl.UInt32),
) )
@ -48,7 +50,9 @@ def download_and_convert(output_path: Path) -> None:
result = result.filter(pl.col("lsoa21").str.starts_with("E")) result = result.filter(pl.col("lsoa21").str.starts_with("E"))
print(f"England LSOAs: {result.height}") print(f"England LSOAs: {result.height}")
print(f"Population range: {result['population'].min()} - {result['population'].max()}") print(
f"Population range: {result['population'].min()} - {result['population'].max()}"
)
print(f"Mean population: {result['population'].mean():.0f}") print(f"Mean population: {result['population'].mean():.0f}")
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -119,7 +119,11 @@ class PlaceHandler(osmium.SimpleHandler):
station_tag = tags.get("station", "") station_tag = tags.get("station", "")
network = tags.get("network", "").lower() network = tags.get("network", "").lower()
# Skip tram stops # Skip tram stops
if station_tag == "light_rail" or "tramlink" in network or "tram" in network: if (
station_tag == "light_rail"
or "tramlink" in network
or "tram" in network
):
return return
display_name = _station_display_name(name, tags) display_name = _station_display_name(name, tags)
self._add(display_name, "station", lat, lon, population) self._add(display_name, "station", lat, lon, population)
@ -131,9 +135,7 @@ def main() -> None:
parser.add_argument( parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path" "--output", type=Path, required=True, help="Output parquet file path"
) )
parser.add_argument( parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument( parser.add_argument(
"--boundary", "--boundary",
type=Path, type=Path,

View file

@ -111,9 +111,7 @@ def main() -> None:
parser.add_argument( parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path" "--output", type=Path, required=True, help="Output parquet file path"
) )
parser.add_argument( parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument( parser.add_argument(
"--boundary", "--boundary",
type=Path, type=Path,

View file

@ -99,10 +99,14 @@ def convert_to_parquet(xls_path: Path, parquet_path: Path) -> None:
combined = pl.concat(frames) combined = pl.concat(frames)
# Remap old LA codes to new unitary authority codes and average medians # Remap old LA codes to new unitary authority codes and average medians
combined = combined.with_columns( combined = (
pl.col("area_code").replace(LA_CONSOLIDATION), combined.with_columns(
).group_by("area_code", "bedrooms").agg( pl.col("area_code").replace(LA_CONSOLIDATION),
pl.col("median_monthly_rent").mean(), )
.group_by("area_code", "bedrooms")
.agg(
pl.col("median_monthly_rent").mean(),
)
) )
print(f"Combined: {combined.shape}") print(f"Combined: {combined.shape}")

View file

@ -13,9 +13,7 @@ TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None: def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
df = pl.read_parquet(postcodes_path, columns=["Postcode"]) df = pl.read_parquet(postcodes_path, columns=["Postcode"])
outcodes = sorted( outcodes = sorted(set(df["Postcode"].str.split(" ").list.first().to_list()) - {""})
set(df["Postcode"].str.split(" ").list.first().to_list()) - {""}
)
print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...") print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...")
mapping: dict[str, str] = {} mapping: dict[str, str] = {}
@ -28,11 +26,9 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
data = resp.json() data = resp.json()
found = False found = False
for m in data.get("matches", []): for m in data.get("matches", []):
if ( if m["type"] == "OUTCODE" and m["displayName"].upper().replace(
m["type"] == "OUTCODE" " ", ""
and m["displayName"].upper().replace(" ", "") ) == oc.upper().replace(" ", ""):
== oc.upper().replace(" ", "")
):
mapping[oc] = str(m["id"]) mapping[oc] = str(m["id"])
found = True found = True
break break
@ -57,9 +53,7 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="Fetch Rightmove outcode ID mapping")
description="Fetch Rightmove outcode ID mapping"
)
parser.add_argument( parser.add_argument(
"--postcodes", type=Path, required=True, help="postcode.parquet path" "--postcodes", type=Path, required=True, help="postcode.parquet path"
) )

View file

@ -64,7 +64,9 @@ def ensure_pmtiles_cli(bin_path: Path, version: str) -> None:
def main(): def main():
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--output", type=Path, required=True, help="Output .pmtiles path") parser.add_argument(
"--output", type=Path, required=True, help="Output .pmtiles path"
)
parser.add_argument( parser.add_argument(
"--pmtiles-version", default="1.22.3", help="go-pmtiles release version" "--pmtiles-version", default="1.22.3", help="go-pmtiles release version"
) )

View file

@ -56,7 +56,9 @@ NR_TIMETABLE_URL = "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/time
USER_AGENT = "property-map-pipeline/1.0 (https://github.com)" USER_AGENT = "property-map-pipeline/1.0 (https://github.com)"
def _download_http(url: str, dest: Path, *, desc: str, headers: dict | None = None) -> None: def _download_http(
url: str, dest: Path, *, desc: str, headers: dict | None = None
) -> None:
"""Stream-download a URL to a file with progress bar.""" """Stream-download a URL to a file with progress bar."""
dest.parent.mkdir(parents=True, exist_ok=True) dest.parent.mkdir(parents=True, exist_ok=True)
tmp = dest.with_suffix(dest.suffix + ".tmp") tmp = dest.with_suffix(dest.suffix + ".tmp")
@ -117,9 +119,10 @@ def clean_gtfs(src: Path, dst: Path) -> None:
return return
print("Cleaning GTFS for R5 compatibility...") print("Cleaning GTFS for R5 compatibility...")
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile( with (
dst, "w", zipfile.ZIP_DEFLATED zipfile.ZipFile(src, "r") as zin,
) as zout: zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist(): for info in zin.infolist():
if info.filename == "stop_times.txt": if info.filename == "stop_times.txt":
dropped = 0 dropped = 0
@ -127,7 +130,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
header = f.readline() header = f.readline()
header_str = header.decode("utf-8").strip() header_str = header.decode("utf-8").strip()
cols = header_str.split(",") cols = header_str.split(",")
arr_idx = cols.index("arrival_time") if "arrival_time" in cols else -1 arr_idx = (
cols.index("arrival_time") if "arrival_time" in cols else -1
)
dep_idx = ( dep_idx = (
cols.index("departure_time") if "departure_time" in cols else -1 cols.index("departure_time") if "departure_time" in cols else -1
) )
@ -179,7 +184,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
year = int(date_val[:4]) year = int(date_val[:4])
if year > 2100: if year > 2100:
parts[i] = "20991231" parts[i] = "20991231"
print(f" feed_info: capped end_date {date_val} → 20991231") print(
f" feed_info: capped end_date {date_val} → 20991231"
)
fixed_lines.append(",".join(parts)) fixed_lines.append(",".join(parts))
zout.writestr("feed_info.txt", "\n".join(fixed_lines) + "\n") zout.writestr("feed_info.txt", "\n".join(fixed_lines) + "\n")
else: else:
@ -334,7 +341,9 @@ def convert_high_freq_to_frequency_based(
end_secs = trips[-1][1] + int(median_hw) end_secs = trips[-1][1] + int(median_hw)
headway_rounded = max(60, round(median_hw / 60) * 60) headway_rounded = max(60, round(median_hw / 60) * 60)
frequency_entries.append((template_trip_id, start_secs, end_secs, headway_rounded)) frequency_entries.append(
(template_trip_id, start_secs, end_secs, headway_rounded)
)
for trip_id, _ in trips[1:]: for trip_id, _ in trips[1:]:
trips_to_remove.add(trip_id) trips_to_remove.add(trip_id)
groups_converted += 1 groups_converted += 1
@ -344,9 +353,10 @@ def convert_high_freq_to_frequency_based(
print(f" Created {len(frequency_entries)} frequency entries") print(f" Created {len(frequency_entries)} frequency entries")
# Step 5: Write modified GTFS # Step 5: Write modified GTFS
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile( with (
dst, "w", zipfile.ZIP_DEFLATED zipfile.ZipFile(src, "r") as zin,
) as zout: zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist(): for info in zin.infolist():
if info.filename == "trips.txt": if info.filename == "trips.txt":
with zin.open(info) as f: with zin.open(info) as f:
@ -466,15 +476,22 @@ def download_national_rail_cif(raw_dir: Path) -> Path | None:
email = os.environ.get("NATIONAL_RAIL_EMAIL") email = os.environ.get("NATIONAL_RAIL_EMAIL")
password = os.environ.get("NATIONAL_RAIL_PASSWORD") password = os.environ.get("NATIONAL_RAIL_PASSWORD")
if not email or not password: if not email or not password:
print("Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail") print(
"Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail"
)
return None return None
print("Authenticating with National Rail Open Data...") print("Authenticating with National Rail Open Data...")
auth_data = urllib.parse.urlencode({"username": email, "password": password}).encode() auth_data = urllib.parse.urlencode(
{"username": email, "password": password}
).encode()
auth_req = urllib.request.Request( auth_req = urllib.request.Request(
NR_AUTH_URL, NR_AUTH_URL,
data=auth_data, data=auth_data,
headers={"User-Agent": USER_AGENT, "Content-Type": "application/x-www-form-urlencoded"}, headers={
"User-Agent": USER_AGENT,
"Content-Type": "application/x-www-form-urlencoded",
},
) )
with urllib.request.urlopen(auth_req) as resp: with urllib.request.urlopen(auth_req) as resp:
token_data = json.loads(resp.read()) token_data = json.loads(resp.read())
@ -565,9 +582,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
coords_fixed = 0 coords_fixed = 0
route_types_fixed = 0 route_types_fixed = 0
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile( with (
dst, "w", zipfile.ZIP_DEFLATED zipfile.ZipFile(src, "r") as zin,
) as zout: zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist(): for info in zin.infolist():
# Skip non-standard links.txt # Skip non-standard links.txt
if info.filename == "links.txt": if info.filename == "links.txt":
@ -581,8 +599,12 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
trip_id_idx = cols.index("trip_id") trip_id_idx = cols.index("trip_id")
stop_id_idx = cols.index("stop_id") stop_id_idx = cols.index("stop_id")
seq_idx = cols.index("stop_sequence") seq_idx = cols.index("stop_sequence")
pickup_idx = cols.index("pickup_type") if "pickup_type" in cols else -1 pickup_idx = (
dropoff_idx = cols.index("drop_off_type") if "drop_off_type" in cols else -1 cols.index("pickup_type") if "pickup_type" in cols else -1
)
dropoff_idx = (
cols.index("drop_off_type") if "drop_off_type" in cols else -1
)
tmp = tempfile.NamedTemporaryFile( tmp = tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".txt" mode="wb", delete=False, suffix=".txt"
@ -769,16 +791,27 @@ def _docker_run_dtd2mysql(
) -> None: ) -> None:
"""Run dtd2mysql in a Node.js container on the same Docker network as MariaDB.""" """Run dtd2mysql in a Node.js container on the same Docker network as MariaDB."""
cmd = [ cmd = [
"docker", "run", "--rm", "--network", network, "docker",
"-e", f"DATABASE_HOSTNAME={db_container}", "run",
"-e", "DATABASE_USERNAME=root", "--rm",
"-e", "DATABASE_PASSWORD=root", "--network",
"-e", "DATABASE_NAME=dtd", network,
"-e",
f"DATABASE_HOSTNAME={db_container}",
"-e",
"DATABASE_USERNAME=root",
"-e",
"DATABASE_PASSWORD=root",
"-e",
"DATABASE_NAME=dtd",
] ]
for v in volumes: for v in volumes:
cmd.extend(["-v", v]) cmd.extend(["-v", v])
# Install zip (needed for --gtfs-zip) then run dtd2mysql # Install zip (needed for --gtfs-zip) then run dtd2mysql
inner = "apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql " + " ".join(args) inner = (
"apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql "
+ " ".join(args)
)
cmd.extend(["node:20", "bash", "-c", inner]) cmd.extend(["node:20", "bash", "-c", inner])
subprocess.run(cmd, check=True) subprocess.run(cmd, check=True)
@ -805,11 +838,17 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
subprocess.run(["docker", "network", "create", network], capture_output=True) subprocess.run(["docker", "network", "create", network], capture_output=True)
subprocess.run( subprocess.run(
[ [
"docker", "run", "-d", "docker",
"--name", db_container, "run",
"--network", network, "-d",
"-e", "MARIADB_ROOT_PASSWORD=root", "--name",
"-e", "MARIADB_DATABASE=dtd", db_container,
"--network",
network,
"-e",
"MARIADB_ROOT_PASSWORD=root",
"-e",
"MARIADB_DATABASE=dtd",
"mariadb:latest", "mariadb:latest",
], ],
check=True, check=True,
@ -820,7 +859,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
print(" Waiting for MariaDB to be ready...") print(" Waiting for MariaDB to be ready...")
for attempt in range(30): for attempt in range(30):
result = subprocess.run( result = subprocess.run(
["docker", "exec", db_container, "mariadb", "-uroot", "-proot", "-e", "SELECT 1"], [
"docker",
"exec",
db_container,
"mariadb",
"-uroot",
"-proot",
"-e",
"SELECT 1",
],
capture_output=True, capture_output=True,
) )
if result.returncode == 0: if result.returncode == 0:
@ -833,14 +881,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
print("Importing CIF timetable into MariaDB...") print("Importing CIF timetable into MariaDB...")
_docker_run_dtd2mysql( _docker_run_dtd2mysql(
network, db_container, network,
db_container,
volumes=[f"{raw_abs}:/data:ro"], volumes=[f"{raw_abs}:/data:ro"],
args=["--timetable", "/data/national_rail_cif.zip"], args=["--timetable", "/data/national_rail_cif.zip"],
) )
print("Exporting GTFS from MariaDB...") print("Exporting GTFS from MariaDB...")
_docker_run_dtd2mysql( _docker_run_dtd2mysql(
network, db_container, network,
db_container,
volumes=[f"{raw_abs}:/output"], volumes=[f"{raw_abs}:/output"],
args=["--gtfs-zip", "/output/national_rail_gtfs_raw.zip"], args=["--gtfs-zip", "/output/national_rail_gtfs_raw.zip"],
) )

View file

@ -94,11 +94,18 @@ def _build(
# Remap terminated postcodes to nearest active successor # Remap terminated postcodes to nearest active successor
postcode_mapping = build_postcode_mapping(arcgis_path) postcode_mapping = build_postcode_mapping(arcgis_path)
wide = wide.join( wide = (
postcode_mapping.lazy(), left_on="postcode", right_on="old_postcode", how="left" wide.join(
).with_columns( postcode_mapping.lazy(),
pl.coalesce("new_postcode", "postcode").alias("postcode"), left_on="postcode",
).drop("new_postcode") right_on="old_postcode",
how="left",
)
.with_columns(
pl.coalesce("new_postcode", "postcode").alias("postcode"),
)
.drop("new_postcode")
)
arcgis = ( arcgis = (
pl.scan_parquet(arcgis_path) pl.scan_parquet(arcgis_path)
@ -252,16 +259,18 @@ def _build(
.otherwise(pl.col("pp_property_type")) .otherwise(pl.col("pp_property_type"))
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes", # Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
# collapse terrace sub-types, and fold rare types into "Other" # collapse terrace sub-types, and fold rare types into "Other"
.replace({ .replace(
"Flat": "Flats/Maisonettes", {
"Maisonette": "Flats/Maisonettes", "Flat": "Flats/Maisonettes",
"End-Terrace": "Terraced", "Maisonette": "Flats/Maisonettes",
"Mid-Terrace": "Terraced", "End-Terrace": "Terraced",
"Enclosed End-Terrace": "Terraced", "Mid-Terrace": "Terraced",
"Enclosed Mid-Terrace": "Terraced", "Enclosed End-Terrace": "Terraced",
"Bungalow": "Other", "Enclosed Mid-Terrace": "Terraced",
"Park home": "Other", "Bungalow": "Other",
}) "Park home": "Other",
}
)
.alias("property_type") .alias("property_type")
) )
@ -426,10 +435,16 @@ def main():
help="Census 2021 population by LSOA parquet file", help="Census 2021 population by LSOA parquet file",
) )
parser.add_argument( parser.add_argument(
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path" "--output-postcodes",
type=Path,
required=True,
help="Output postcode parquet file path",
) )
parser.add_argument( parser.add_argument(
"--output-properties", type=Path, required=True, help="Output properties parquet file path" "--output-properties",
type=Path,
required=True,
help="Output properties parquet file path",
) )
args = parser.parse_args() args = parser.parse_args()

View file

@ -454,9 +454,7 @@ class TestFillHoles:
hole1 = [(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)] hole1 = [(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)]
outer2 = [(60, 60), (110, 60), (110, 110), (60, 110), (60, 60)] outer2 = [(60, 60), (110, 60), (110, 110), (60, 110), (60, 60)]
hole2 = [(70, 70), (80, 70), (80, 80), (70, 80), (70, 70)] hole2 = [(70, 70), (80, 70), (80, 80), (70, 80), (70, 70)]
mp = MultiPolygon( mp = MultiPolygon([Polygon(outer1, [hole1]), Polygon(outer2, [hole2])])
[Polygon(outer1, [hole1]), Polygon(outer2, [hole2])]
)
result = _fill_holes(mp) result = _fill_holes(mp)
assert result.geom_type == "MultiPolygon" assert result.geom_type == "MultiPolygon"
for p in result.geoms: for p in result.geoms:

View file

@ -112,7 +112,9 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict: def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0) valid = (
np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
)
actual = actual[valid] actual = actual[valid]
predicted = predicted[valid] predicted = predicted[valid]
@ -176,7 +178,10 @@ def main():
"--input", type=Path, required=True, help="Path to properties.parquet" "--input", type=Path, required=True, help="Path to properties.parquet"
) )
parser.add_argument( parser.add_argument(
"--postcodes", type=Path, required=True, help="Path to postcode.parquet (for lat/lon)" "--postcodes",
type=Path,
required=True,
help="Path to postcode.parquet (for lat/lon)",
) )
parser.add_argument( parser.add_argument(
"--output", type=Path, required=True, help="Output backtest_results.parquet" "--output", type=Path, required=True, help="Output backtest_results.parquet"
@ -185,7 +190,9 @@ def main():
# Build index from pre-test data only (temporal holdout) # Build index from pre-test data only (temporal holdout)
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...") print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
index = build_index(args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes) index = build_index(
args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes
)
print( print(
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, " f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
f"{index['type_group'].n_unique()} type groups" f"{index['type_group'].n_unique()} type groups"
@ -233,7 +240,9 @@ def main():
knn_est = knn_psm * fa * temporal_adj knn_est = knn_psm * fa * temporal_adj
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum()) n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
print(f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)") print(
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
)
# Blend: (1-w)*index + w*kNN where both available # Blend: (1-w)*index + w*kNN where both available
index_est = test["predicted"].to_numpy().astype(np.float64) index_est = test["predicted"].to_numpy().astype(np.float64)

View file

@ -107,9 +107,7 @@ def main():
pl.when(has_price) pl.when(has_price)
.then( .then(
pl.col("Last known price").cast(pl.Float64) pl.col("Last known price").cast(pl.Float64)
* ( * (pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp"))
pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp")
)
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT) .clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
.exp() .exp()
) )

View file

@ -105,9 +105,7 @@ def extract_pairs(input_path: Path, max_year2: int | None = None) -> pl.DataFram
.alias("log_ratio"), .alias("log_ratio"),
( (
1.0 1.0
/ (pl.col("frac_year2") - pl.col("frac_year1")) / (pl.col("frac_year2") - pl.col("frac_year1")).cast(pl.Float64).sqrt()
.cast(pl.Float64)
.sqrt()
).alias("weight"), ).alias("weight"),
) )
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD) .filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
@ -453,8 +451,12 @@ def main():
description="Build improved repeat-sales price index" description="Build improved repeat-sales price index"
) )
parser.add_argument("--input", type=Path, required=True) parser.add_argument("--input", type=Path, required=True)
parser.add_argument("--postcodes", type=Path, required=True, parser.add_argument(
help="Path to postcode.parquet (for lat/lon centroids)") "--postcodes",
type=Path,
required=True,
help="Path to postcode.parquet (for lat/lon centroids)",
)
parser.add_argument("--output", type=Path, required=True) parser.add_argument("--output", type=Path, required=True)
args = parser.parse_args() args = parser.parse_args()

View file

@ -43,48 +43,39 @@ def build_knn_pool(
""" """
print("Building kNN pool...") print("Building kNN pool...")
lf = pl.scan_parquet(source) if isinstance(source, Path) else source lf = pl.scan_parquet(source) if isinstance(source, Path) else source
query = ( query = lf.select(
lf "Postcode",
.select( "Property type",
"Postcode", "lat",
"Property type", "lon",
"lat", "Total floor area (sqm)",
"lon", "Last known price",
"Total floor area (sqm)", "Date of last transaction",
"Last known price", ).filter(
"Date of last transaction", pl.col("lat").is_not_null(),
) pl.col("lon").is_not_null(),
.filter( pl.col("Total floor area (sqm)").is_not_null(),
pl.col("lat").is_not_null(), pl.col("Total floor area (sqm)") > 0,
pl.col("lon").is_not_null(), pl.col("Last known price").is_not_null(),
pl.col("Total floor area (sqm)").is_not_null(), pl.col("Last known price") > 0,
pl.col("Total floor area (sqm)") > 0, pl.col("Postcode").is_not_null(),
pl.col("Last known price").is_not_null(), pl.col("Date of last transaction").is_not_null(),
pl.col("Last known price") > 0,
pl.col("Postcode").is_not_null(),
pl.col("Date of last transaction").is_not_null(),
)
) )
if max_sale_year is not None: if max_sale_year is not None:
query = query.filter( query = query.filter(
pl.col("Date of last transaction").dt.year() < max_sale_year pl.col("Date of last transaction").dt.year() < max_sale_year
) )
pool = ( pool = query.with_columns(
query.with_columns( sector_expr(),
sector_expr(), type_group_expr(),
type_group_expr(), (
( pl.col("Date of last transaction").dt.year().cast(pl.Float64)
pl.col("Date of last transaction").dt.year().cast(pl.Float64) + (pl.col("Date of last transaction").dt.month().cast(pl.Float64) - 1.0)
+ ( / 12.0
pl.col("Date of last transaction").dt.month().cast(pl.Float64) ).alias("_sale_fy"),
- 1.0 pl.lit(ref_frac_year).alias("_ref_fy"),
) ).collect()
/ 12.0
).alias("_sale_fy"),
pl.lit(ref_frac_year).alias("_ref_fy"),
).collect()
)
pool = pool.filter(pl.col("type_group").is_not_null()) pool = pool.filter(pl.col("type_group").is_not_null())
print(f" {len(pool):,} pool properties with lat/lon, floor area, price") print(f" {len(pool):,} pool properties with lat/lon, floor area, price")

View file

@ -1085,7 +1085,9 @@ def transform(
if cat not in all_set: if cat not in all_set:
mapped_but_absent.append(cat) mapped_but_absent.append(cat)
if mapped_but_absent: if mapped_but_absent:
print(f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}") print(
f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}"
)
# Drop unwanted categories # Drop unwanted categories
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES))) lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))

View file

@ -27,7 +27,9 @@ def load_england_polygon(geojson_path: Path) -> PreparedGeometry:
return prep(geometry) return prep(geometry)
def in_england_mask(geojson_path: Path, lats: np.ndarray, lngs: np.ndarray) -> np.ndarray: def in_england_mask(
geojson_path: Path, lats: np.ndarray, lngs: np.ndarray
) -> np.ndarray:
"""Vectorized check: which (lat, lng) points are within England. """Vectorized check: which (lat, lng) points are within England.
Returns a boolean numpy array. Returns a boolean numpy array.

View file

@ -106,7 +106,9 @@ def count_pois_per_postcode(
if nearby is None: if nearby is None:
continue continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]) distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
within_mask = distances <= radius_km within_mask = distances <= radius_km
within_indices = nearby[within_mask] within_indices = nearby[within_mask]
@ -179,7 +181,9 @@ def min_distance_per_postcode(
if nearby is None: if nearby is None:
continue continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]) distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
for group, cat_mask in category_masks.items(): for group, cat_mask in category_masks.items():
group_mask = cat_mask[nearby] group_mask = cat_mask[nearby]

View file

@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
""" """
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001") arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect() active = (
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect() arcgis.filter(pl.col("doterm").is_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
terminated = (
arcgis.filter(pl.col("doterm").is_not_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}") print(
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
)
if terminated.height == 0: if terminated.height == 0:
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)}) return pl.DataFrame(
{
"old_postcode": pl.Series([], dtype=pl.Utf8),
"new_postcode": pl.Series([], dtype=pl.Utf8),
}
)
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]) active_coords = np.column_stack(
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]) [active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
)
terminated_coords = np.column_stack(
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
)
tree = cKDTree(active_coords) tree = cKDTree(active_coords)
distances, indices = tree.query(terminated_coords) distances, indices = tree.query(terminated_coords)
active_postcodes = active["pcds"] active_postcodes = active["pcds"]
mapping = pl.DataFrame({ mapping = pl.DataFrame(
"old_postcode": terminated["pcds"], {
"new_postcode": active_postcodes.gather(indices), "old_postcode": terminated["pcds"],
}) "new_postcode": active_postcodes.gather(indices),
}
)
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m") print(
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
)
return mapping return mapping

View file

@ -72,7 +72,9 @@ def test_no_pois_returns_zeros(postcodes):
"category": pl.Series([], dtype=pl.String), "category": pl.Series([], dtype=pl.String),
} }
) )
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0) result = count_pois_per_postcode(
postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0
)
for group in POI_GROUPS: for group in POI_GROUPS:
col = f"{group}_2km" col = f"{group}_2km"
@ -125,7 +127,9 @@ def test_min_distance_no_pois_returns_nan(postcodes):
"category": pl.Series([], dtype=pl.String), "category": pl.Series([], dtype=pl.String),
} }
) )
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]}) result = min_distance_per_postcode(
postcodes, empty_pois, groups={"train_tube": ["Rail station"]}
)
assert "train_tube_nearest_km" in result.columns assert "train_tube_nearest_km" in result.columns
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list()) assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())

View file

@ -28,8 +28,8 @@
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"\n", "\n",
"\n", "\n",
"pd.set_option('display.max_columns', None)\n", "pd.set_option(\"display.max_columns\", None)\n",
"pd.set_option('display.max_colwidth', 60)" "pd.set_option(\"display.max_colwidth\", 60)"
] ]
}, },
{ {
@ -47,7 +47,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"param_import_path = '/bulk/wide-2.parquet'\n", "param_import_path = \"/bulk/wide-2.parquet\"\n",
"\n", "\n",
"param_lookback = 3" "param_lookback = 3"
] ]
@ -128,7 +128,7 @@
], ],
"source": [ "source": [
"data = pl.scan_parquet(param_import_path).unique(subset=[\"Postcode\", \"Address per EPC\"])\n", "data = pl.scan_parquet(param_import_path).unique(subset=[\"Postcode\", \"Address per EPC\"])\n",
"data = data.filter(pl.col('Total floor area (sqm)') > 10)\n", "data = data.filter(pl.col(\"Total floor area (sqm)\") > 10)\n",
"\n", "\n",
"# print(data.collect_schema()) # column names and types\n", "# print(data.collect_schema()) # column names and types\n",
"print(data.select(pl.len()).collect()) # row count\n", "print(data.select(pl.len()).collect()) # row count\n",
@ -145,22 +145,20 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"columns_required = [\n", "columns_required = [\n",
" # absolute neccesity \n", " # absolute neccesity\n",
" 'Postcode',\n", " \"Postcode\",\n",
" 'Address per EPC',\n", " \"Address per EPC\",\n",
" 'historical_prices',\n", " \"historical_prices\",\n",
" 'Price per sqm',\n", " \"Price per sqm\",\n",
"\n",
" # faily fixed attributes\n", " # faily fixed attributes\n",
" 'Property type', # or 'epc_property_type' or 'built_form'\n", " \"Property type\", # or 'epc_property_type' or 'built_form'\n",
" 'Leashold/Freehold',\n", " \"Leashold/Freehold\",\n",
" 'Total floor area (sqm)',\n", " \"Total floor area (sqm)\",\n",
" 'Rooms (including bedrooms & bathrooms)',\n", " \"Rooms (including bedrooms & bathrooms)\",\n",
" 'Approximate construction age',\n", " \"Approximate construction age\",\n",
"\n",
" # latest\n", " # latest\n",
" # 'date_of_transfer'\n", " # 'date_of_transfer'\n",
" 'Last known price'\n", " \"Last known price\",\n",
"]" "]"
] ]
}, },
@ -440,8 +438,13 @@
], ],
"source": [ "source": [
"# temp_Postcodes = [\"LE5 4ED\", \"E14 9GU\", \"YO8 9PW\", \"SW1P 3AN\", \"BH3 7DX\", \"E14 2DG\"]\n", "# temp_Postcodes = [\"LE5 4ED\", \"E14 9GU\", \"YO8 9PW\", \"SW1P 3AN\", \"BH3 7DX\", \"E14 2DG\"]\n",
"temp_Postcodes = data.select('Postcode').collect().sample(10000)['Postcode'].to_list()\n", "temp_Postcodes = data.select(\"Postcode\").collect().sample(10000)[\"Postcode\"].to_list()\n",
"data_small = data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes)).select(columns_required).collect().to_pandas()\n", "data_small = (\n",
" data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes))\n",
" .select(columns_required)\n",
" .collect()\n",
" .to_pandas()\n",
")\n",
"data_small = data_small.explode(\"historical_prices\")\n", "data_small = data_small.explode(\"historical_prices\")\n",
"data_small[\"year\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"year\"])\n", "data_small[\"year\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"year\"])\n",
"data_small[\"price\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"price\"])\n", "data_small[\"price\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"price\"])\n",
@ -458,7 +461,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# data_small[\n", "# data_small[\n",
"# (data_small['Postcode'] == 'E14 2DG') \n", "# (data_small['Postcode'] == 'E14 2DG')\n",
"# & data_small['epc_address'].str.contains('76')\n", "# & data_small['epc_address'].str.contains('76')\n",
"# ]" "# ]"
] ]
@ -908,35 +911,45 @@
"from typing import Any\n", "from typing import Any\n",
"from pandas.core.frame import DataFrame\n", "from pandas.core.frame import DataFrame\n",
"\n", "\n",
"print(f'rolling periods (relative): {[i for i in range(-param_lookback, 1)]}')\n", "print(f\"rolling periods (relative): {[i for i in range(-param_lookback, 1)]}\")\n",
"\n", "\n",
"# Rolling average (±2 year), weighted by number of sales per year\n", "# Rolling average (±2 year), weighted by number of sales per year\n",
"pc_avg_raw = data_small.groupby(['Postcode', 'year']).agg(\n", "pc_avg_raw = (\n",
" ppsqm_sum=('Price per sqm', 'sum'),\n", " data_small.groupby([\"Postcode\", \"year\"])\n",
" ppsqm_count=('Price per sqm', 'count')\n", " .agg(ppsqm_sum=(\"Price per sqm\", \"sum\"), ppsqm_count=(\"Price per sqm\", \"count\"))\n",
").reset_index().sort_values(by=['Postcode', 'year'], ascending=False)\n", " .reset_index()\n",
" .sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
")\n",
"\n", "\n",
"display(pc_avg_raw)\n", "display(pc_avg_raw)\n",
"\n", "\n",
"# Each year's totals contribute to year-1, year, and year+1\n", "# Each year's totals contribute to year-1, year, and year+1\n",
"pc_avg_expanded = pd.concat([\n", "pc_avg_expanded = pd.concat(\n",
" pc_avg_raw.assign(year=pc_avg_raw['year'] + offset) for offset in range(-param_lookback, 1) # \n", " [\n",
"])\n", " pc_avg_raw.assign(year=pc_avg_raw[\"year\"] + offset)\n",
" for offset in range(-param_lookback, 1) #\n",
" ]\n",
")\n",
"\n", "\n",
"display(pc_avg_expanded)\n", "display(pc_avg_expanded)\n",
"\n", "\n",
"# Sum counts and sums, then divide to get weighted mean\n", "# Sum counts and sums, then divide to get weighted mean\n",
"pc_avg_complex = pc_avg_expanded.groupby(['Postcode', 'year']).agg(\n", "pc_avg_complex = (\n",
" ppsqm_sum=('ppsqm_sum', 'sum'),\n", " pc_avg_expanded.groupby([\"Postcode\", \"year\"])\n",
" ppsqm_count=('ppsqm_count', 'sum')\n", " .agg(ppsqm_sum=(\"ppsqm_sum\", \"sum\"), ppsqm_count=(\"ppsqm_count\", \"sum\"))\n",
").reset_index()\n", " .reset_index()\n",
"pc_avg_complex['Price per sqm PC AVG'] = pc_avg_complex['ppsqm_sum'] / pc_avg_complex['ppsqm_count']\n", ")\n",
"pc_avg_complex: Any | DataFrame = pc_avg_complex[['Postcode', 'year', 'Price per sqm PC AVG']].sort_values(by=['Postcode', 'year'], ascending=False)\n", "pc_avg_complex[\"Price per sqm PC AVG\"] = (\n",
" pc_avg_complex[\"ppsqm_sum\"] / pc_avg_complex[\"ppsqm_count\"]\n",
")\n",
"pc_avg_complex: Any | DataFrame = pc_avg_complex[\n",
" [\"Postcode\", \"year\", \"Price per sqm PC AVG\"]\n",
"].sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
"display(pc_avg_complex)\n", "display(pc_avg_complex)\n",
"\n", "\n",
"temp_df = pc_avg_complex[pc_avg_complex['Postcode'] == data_small['Postcode'].iloc[0]]\n", "temp_df = pc_avg_complex[pc_avg_complex[\"Postcode\"] == data_small[\"Postcode\"].iloc[0]]\n",
"print(data_small['Postcode'].iloc[0])\n", "print(data_small[\"Postcode\"].iloc[0])\n",
"temp_df.plot.line(x='year', y='Price per sqm PC AVG')" "temp_df.plot.line(x=\"year\", y=\"Price per sqm PC AVG\")"
] ]
}, },
{ {
@ -1111,9 +1124,13 @@
} }
], ],
"source": [ "source": [
"data_small = data_small.merge(pc_avg_complex, on=['Postcode', 'year'], suffixes=('', ' pc_avg_complex'))\n", "data_small = data_small.merge(\n",
"data_small['c'] = data_small['Price per sqm'] / data_small['Price per sqm PC AVG']\n", " pc_avg_complex, on=[\"Postcode\", \"year\"], suffixes=(\"\", \" pc_avg_complex\")\n",
"data_small[['Postcode', 'Address per EPC', 'Price per sqm', 'Price per sqm PC AVG', 'c']]" ")\n",
"data_small[\"c\"] = data_small[\"Price per sqm\"] / data_small[\"Price per sqm PC AVG\"]\n",
"data_small[\n",
" [\"Postcode\", \"Address per EPC\", \"Price per sqm\", \"Price per sqm PC AVG\", \"c\"]\n",
"]"
] ]
}, },
{ {
@ -1445,17 +1462,21 @@
], ],
"source": [ "source": [
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n", "# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n", "c_stats = (\n",
" n_sales=('c', 'count'),\n", " data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
" year_min=('year', 'min'),\n", " .agg(\n",
" year_max=('year', 'max'),\n", " n_sales=(\"c\", \"count\"),\n",
" c_mean=('c', 'mean'),\n", " year_min=(\"year\", \"min\"),\n",
" c_std=('c', 'std'),\n", " year_max=(\"year\", \"max\"),\n",
").dropna()\n", " c_mean=(\"c\", \"mean\"),\n",
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n", " c_std=(\"c\", \"std\"),\n",
" )\n",
" .dropna()\n",
")\n",
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n", "# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n", "# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
"c_stats.sort_values('c_cv', ascending=False).head(20)" "c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
] ]
}, },
{ {
@ -2265,42 +2286,44 @@
"display(random_c)\n", "display(random_c)\n",
"\n", "\n",
"# pc avg trend\n", "# pc avg trend\n",
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == random_c.index[0][0]].sort_values(by='year')\n", "temp_pc_avg = pc_avg_complex[\n",
" pc_avg_complex[\"Postcode\"] == random_c.index[0][0]\n",
"].sort_values(by=\"year\")\n",
"display(temp_pc_avg)\n", "display(temp_pc_avg)\n",
"\n", "\n",
"# c for specific address\n", "# c for specific address\n",
"temp_postcode = data_small[\n", "temp_postcode = data_small[\n",
" (data_small['Postcode'] == random_c.index[0][0]) \n", " (data_small[\"Postcode\"] == random_c.index[0][0])\n",
" # & (data_small['Address per EPC'] == random_c.index[0][1]) \n", " # & (data_small['Address per EPC'] == random_c.index[0][1])\n",
"].sort_values(by='year')\n", "].sort_values(by=\"year\")\n",
"display(temp_postcode)\n", "display(temp_postcode)\n",
"\n", "\n",
"temp_address = data_small[\n", "temp_address = data_small[\n",
" (data_small['Postcode'] == random_c.index[0][0]) \n", " (data_small[\"Postcode\"] == random_c.index[0][0])\n",
" & (data_small['Address per EPC'] == random_c.index[0][1]) \n", " & (data_small[\"Address per EPC\"] == random_c.index[0][1])\n",
"].sort_values(by='year')\n", "].sort_values(by=\"year\")\n",
"display(temp_address)\n", "display(temp_address)\n",
"\n", "\n",
"# plot\n", "# plot\n",
"\n", "\n",
"fig, ax1 = plt.subplots()\n", "fig, ax1 = plt.subplots()\n",
"\n", "\n",
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n", "temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n", "temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
"\n", "\n",
"ax2 = ax1.twinx()\n", "ax2 = ax1.twinx()\n",
"ax2.set_ylim(0, 3)\n", "ax2.set_ylim(0, 3)\n",
"\n", "\n",
"for property in temp_postcode['Address per EPC'].unique():\n", "for property in temp_postcode[\"Address per EPC\"].unique():\n",
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n", " property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n", " property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
"\n", "\n",
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n", "temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
"\n", "\n",
"ax1.set_ylabel('Price per sqm')\n", "ax1.set_ylabel(\"Price per sqm\")\n",
"ax2.set_ylabel('c')\n", "ax2.set_ylabel(\"c\")\n",
"\n", "\n",
"plt.show()\n" "plt.show()"
] ]
}, },
{ {
@ -2640,17 +2663,21 @@
], ],
"source": [ "source": [
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n", "# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n", "c_stats = (\n",
" n_sales=('c', 'count'),\n", " data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
" year_min=('year', 'min'),\n", " .agg(\n",
" year_max=('year', 'max'),\n", " n_sales=(\"c\", \"count\"),\n",
" c_mean=('c', 'mean'),\n", " year_min=(\"year\", \"min\"),\n",
" c_std=('c', 'std'),\n", " year_max=(\"year\", \"max\"),\n",
").dropna()\n", " c_mean=(\"c\", \"mean\"),\n",
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n", " c_std=(\"c\", \"std\"),\n",
" )\n",
" .dropna()\n",
")\n",
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n", "# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n", "# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
"c_stats.sort_values('c_cv', ascending=False).head(20)" "c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
] ]
}, },
{ {
@ -2685,31 +2712,41 @@
"\n", "\n",
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n", "fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
"\n", "\n",
"axes[0].hist(c_stats['c_std'], bins=100, edgecolor='black')\n", "axes[0].hist(c_stats[\"c_std\"], bins=100, edgecolor=\"black\")\n",
"axes[0].set_xlabel('Std of c')\n", "axes[0].set_xlabel(\"Std of c\")\n",
"axes[0].set_ylabel('Number of properties')\n", "axes[0].set_ylabel(\"Number of properties\")\n",
"axes[0].set_title('Distribution of c stability (std)')\n", "axes[0].set_title(\"Distribution of c stability (std)\")\n",
"axes[0].axvline(x=c_stats['c_std'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_std'].median()}) threshold')\n", "axes[0].axvline(\n",
" x=c_stats[\"c_std\"].median(),\n",
" color=\"red\",\n",
" linestyle=\"--\",\n",
" label=f\"Median ({c_stats['c_std'].median()}) threshold\",\n",
")\n",
"axes[0].legend()\n", "axes[0].legend()\n",
"\n", "\n",
"axes[1].hist(c_stats['c_cv'], bins=100, edgecolor='black')\n", "axes[1].hist(c_stats[\"c_cv\"], bins=100, edgecolor=\"black\")\n",
"axes[1].set_xlabel('CV of c (std/mean)')\n", "axes[1].set_xlabel(\"CV of c (std/mean)\")\n",
"axes[1].set_ylabel('Number of properties')\n", "axes[1].set_ylabel(\"Number of properties\")\n",
"axes[1].set_title('Distribution of c stability (CV)')\n", "axes[1].set_title(\"Distribution of c stability (CV)\")\n",
"axes[1].axvline(x=c_stats['c_cv'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_cv'].median()}) threshold')\n", "axes[1].axvline(\n",
" x=c_stats[\"c_cv\"].median(),\n",
" color=\"red\",\n",
" linestyle=\"--\",\n",
" label=f\"Median ({c_stats['c_cv'].median()}) threshold\",\n",
")\n",
"axes[1].legend()\n", "axes[1].legend()\n",
"\n", "\n",
"plt.tight_layout()\n", "plt.tight_layout()\n",
"plt.show()\n", "plt.show()\n",
"\n", "\n",
"# output text\n", "# output text\n",
"pct_stable = (c_stats['c_cv'] < 0.1).mean() * 100\n", "pct_stable = (c_stats[\"c_cv\"] < 0.1).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.1\")\n", "print(f\"{pct_stable:.1f}% of properties have CV < 0.1\")\n",
"\n", "\n",
"pct_stable = (c_stats['c_cv'] < 0.2).mean() * 100\n", "pct_stable = (c_stats[\"c_cv\"] < 0.2).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.2\")\n", "print(f\"{pct_stable:.1f}% of properties have CV < 0.2\")\n",
"\n", "\n",
"pct_stable = (c_stats['c_cv'] < 0.3).mean() * 100\n", "pct_stable = (c_stats[\"c_cv\"] < 0.3).mean() * 100\n",
"print(f\"{pct_stable:.1f}% of properties have CV < 0.3\")" "print(f\"{pct_stable:.1f}% of properties have CV < 0.3\")"
] ]
}, },
@ -3299,7 +3336,7 @@
} }
], ],
"source": [ "source": [
"unstable_c = c_stats.sort_values('c_cv', ascending=False)['c_cv'][:20]\n", "unstable_c = c_stats.sort_values(\"c_cv\", ascending=False)[\"c_cv\"][:20]\n",
"display(unstable_c)\n", "display(unstable_c)\n",
"\n", "\n",
"unstable_c_specific = random.randint(0, 20)\n", "unstable_c_specific = random.randint(0, 20)\n",
@ -3308,41 +3345,43 @@
"print(unstable_c.index[unstable_c_specific][1])\n", "print(unstable_c.index[unstable_c_specific][1])\n",
"\n", "\n",
"# pc avg trend\n", "# pc avg trend\n",
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == unstable_c.index[unstable_c_specific][0]].sort_values(by='year')\n", "temp_pc_avg = pc_avg_complex[\n",
" pc_avg_complex[\"Postcode\"] == unstable_c.index[unstable_c_specific][0]\n",
"].sort_values(by=\"year\")\n",
"display(temp_pc_avg)\n", "display(temp_pc_avg)\n",
"\n", "\n",
"# c for specific postcode\n", "# c for specific postcode\n",
"temp_postcode = data_small[\n", "temp_postcode = data_small[\n",
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n", " (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
" # & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1]) \n", " # & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1])\n",
"].sort_values(by='year')\n", "].sort_values(by=\"year\")\n",
"display(temp_address)\n", "display(temp_address)\n",
"\n", "\n",
"# c for specific address\n", "# c for specific address\n",
"temp_address = data_small[\n", "temp_address = data_small[\n",
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n", " (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
" & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1]) \n", " & (data_small[\"Address per EPC\"] == unstable_c.index[unstable_c_specific][1])\n",
"].sort_values(by='year')\n", "].sort_values(by=\"year\")\n",
"display(temp_address)\n", "display(temp_address)\n",
"\n", "\n",
"# plot\n", "# plot\n",
"\n", "\n",
"fig, ax1 = plt.subplots()\n", "fig, ax1 = plt.subplots()\n",
"\n", "\n",
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n", "temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n", "temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
"\n", "\n",
"ax2 = ax1.twinx()\n", "ax2 = ax1.twinx()\n",
"\n", "\n",
"for property in temp_postcode['Address per EPC'].unique():\n", "for property in temp_postcode[\"Address per EPC\"].unique():\n",
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n", " property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n", " property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n", "temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
"\n", "\n",
"ax1.set_ylabel('Price per sqm')\n", "ax1.set_ylabel(\"Price per sqm\")\n",
"ax2.set_ylabel('c')\n", "ax2.set_ylabel(\"c\")\n",
"\n", "\n",
"plt.show()\n" "plt.show()"
] ]
}, },
{ {
@ -3370,11 +3409,11 @@
], ],
"source": [ "source": [
"# select random address\n", "# select random address\n",
"one_property = data_small.sample(1)[['Postcode', 'Address per EPC']].iloc[0]\n", "one_property = data_small.sample(1)[[\"Postcode\", \"Address per EPC\"]].iloc[0]\n",
"postcode = one_property['Postcode']\n", "postcode = one_property[\"Postcode\"]\n",
"address = one_property['Address per EPC']\n", "address = one_property[\"Address per EPC\"]\n",
"print(f'Postcode: {postcode}')\n", "print(f\"Postcode: {postcode}\")\n",
"print(f'Address: {address}')" "print(f\"Address: {address}\")"
] ]
}, },
{ {
@ -3481,22 +3520,21 @@
], ],
"source": [ "source": [
"property_data = data_small[\n", "property_data = data_small[\n",
" (data_small['Postcode'] == postcode) \n", " (data_small[\"Postcode\"] == postcode) & (data_small[\"Address per EPC\"] == address)\n",
" & (data_small['Address per EPC'] == address) \n",
"]\n", "]\n",
"latest_year = property_data['year'].max()\n", "latest_year = property_data[\"year\"].max()\n",
"print(f'Latest year of data: {latest_year}')\n", "print(f\"Latest year of data: {latest_year}\")\n",
"\n", "\n",
"# Get only the latest year's data for this property (this is what we want to predict)\n", "# Get only the latest year's data for this property (this is what we want to predict)\n",
"data_small_test = property_data[property_data['year'] == latest_year]\n", "data_small_test = property_data[property_data[\"year\"] == latest_year]\n",
"\n", "\n",
"# Remove only the latest year's data from training (keep historical data for this property)\n", "# Remove only the latest year's data from training (keep historical data for this property)\n",
"data_small_train = data_small.drop(data_small_test.index)\n", "data_small_train = data_small.drop(data_small_test.index)\n",
"\n", "\n",
"print()\n", "print()\n",
"print(f'data_small.shape = {data_small.shape}')\n", "print(f\"data_small.shape = {data_small.shape}\")\n",
"print(f'data_small_train.shape = {data_small_train.shape}')\n", "print(f\"data_small_train.shape = {data_small_train.shape}\")\n",
"print(f'data_small_test.shape = {data_small_test.shape}')\n", "print(f\"data_small_test.shape = {data_small_test.shape}\")\n",
"display(data_small_test)\n", "display(data_small_test)\n",
"data_small.shape[0] == data_small_test.shape[0] + data_small_train.shape[0]" "data_small.shape[0] == data_small_test.shape[0] + data_small_train.shape[0]"
] ]
@ -3607,9 +3645,9 @@
"# get latest c in data_small_train\n", "# get latest c in data_small_train\n",
"\n", "\n",
"latest_train_address = data_small_train[\n", "latest_train_address = data_small_train[\n",
" (data_small_train['Postcode'] == postcode) \n", " (data_small_train[\"Postcode\"] == postcode)\n",
" & (data_small_train['Address per EPC'] == address) \n", " & (data_small_train[\"Address per EPC\"] == address)\n",
"].sort_values(by='year')\n", "].sort_values(by=\"year\")\n",
"\n", "\n",
"latest_train_address" "latest_train_address"
] ]
@ -3630,10 +3668,10 @@
} }
], ],
"source": [ "source": [
"latest_train_c = latest_train_address['c'].iloc[-1]\n", "latest_train_c = latest_train_address[\"c\"].iloc[-1]\n",
"latest_train_pc_avg = latest_train_address['Price per sqm PC AVG'].iloc[-1]\n", "latest_train_pc_avg = latest_train_address[\"Price per sqm PC AVG\"].iloc[-1]\n",
"print(f'Latest c in training data: {latest_train_c:.3f}')\n", "print(f\"Latest c in training data: {latest_train_c:.3f}\")\n",
"print(f'Latest price per sqm in training data: {latest_train_pc_avg:.2f}') " "print(f\"Latest price per sqm in training data: {latest_train_pc_avg:.2f}\")"
] ]
}, },
{ {
@ -3654,7 +3692,7 @@
} }
], ],
"source": [ "source": [
"latest_train_c * latest_train_pc_avg * data_small_test['Total floor area (sqm)'].iloc[0]" "latest_train_c * latest_train_pc_avg * data_small_test[\"Total floor area (sqm)\"].iloc[0]"
] ]
}, },
{ {

View file

@ -4,6 +4,7 @@ import sys
from collections import deque from collections import deque
from PIL import Image from PIL import Image
def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None): def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
img = Image.open(path).convert("RGBA") img = Image.open(path).convert("RGBA")
pixels = img.load() pixels = img.load()
@ -43,6 +44,7 @@ def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
img.save(dest) img.save(dest)
print(f"Saved to {dest} ({img.size[0]}x{img.size[1]})") print(f"Saved to {dest} ({img.size[0]}x{img.size[1]})")
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python remove_bg.py <image> [tolerance] [output]") print("Usage: python remove_bg.py <image> [tolerance] [output]")

View file

@ -9,7 +9,7 @@ clap = { version = "4", features = ["derive", "env"] }
axum = "0.8" axum = "0.8"
tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] } tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16"] } polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16", "round_series"] }
h3o = "0.7" h3o = "0.7"
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_json = "1" serde_json = "1"

View file

@ -0,0 +1,981 @@
2026-03-15T19:07:11.371851Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:07:11.372040Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:07:11.372050Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:07:11.473120Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:07:11.473130Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:07:22.441464Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T19:07:22.441476Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T19:07:22.848304Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T19:07:22.848315Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T19:07:22.913269Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T19:07:22.913279Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T19:07:33.981737Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T19:07:33.981832Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T19:07:35.443457Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T19:07:35.594896Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T19:07:36.004267Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T19:07:37.616212Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T19:07:39.772016Z INFO property_map_server::data::property: Building enum features
2026-03-15T19:07:41.167109Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T19:07:49.922117Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T19:07:49.922128Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T19:07:50.511333Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T19:07:50.511342Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T19:07:51.475798Z INFO property_map_server::data::property: Building interned strings
2026-03-15T19:07:57.644399Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T19:08:00.205219Z INFO property_map_server::data::property: Data loading complete
2026-03-15T19:08:01.951719Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T19:08:01.951728Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T19:08:02.049262Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T19:08:02.049271Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T19:08:02.477049Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T19:08:02.477606Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T19:08:02.477614Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T19:08:02.501985Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T19:08:02.612770Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T19:08:02.613426Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T19:08:02.652324Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T19:08:02.652334Z INFO property_map_server: Building POI spatial grid index
2026-03-15T19:08:02.658355Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T19:08:02.658366Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T19:08:02.660009Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T19:08:02.660839Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T19:08:02.660901Z INFO property_map_server: Place data loaded places=3474
2026-03-15T19:08:02.660910Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T19:08:02.660914Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T19:08:02.664216Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T19:08:04.225988Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T19:08:04.225999Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T19:08:04.226018Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T19:08:04.265284Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T19:08:04.310151Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T19:08:04.343659Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T19:08:04.343821Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T19:08:04.343836Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T19:08:04.432857Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T19:08:04.438796Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T19:08:04.443153Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T19:08:04.727071Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfectpostcodes.schmelczer.dev/pb
2026-03-15T19:08:04.742347Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T19:08:04.742398Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T19:08:04.742414Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T19:08:04.783484Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T19:08:04.784506Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T19:08:04.785961Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T19:08:04.794234Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T19:08:04.794259Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T19:08:04.794312Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T19:08:07.250052Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T19:08:07.250147Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T19:08:43.332953Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=905 cells_after_filter=904 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.2 total_ms=82.4
2026-03-15T19:08:44.121660Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=68.3 total_ms=84.2
2026-03-15T19:08:45.206450Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=71.1 total_ms=94.8
2026-03-15T19:08:45.422613Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1356 cells_after_filter=1351 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1077.7 total_ms=1109.0
2026-03-15T19:08:47.208854Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=68.0 total_ms=79.9
2026-03-15T19:08:47.294458Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1356 cells_after_filter=1351 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1074.4 total_ms=1104.8
2026-03-15T19:08:48.178659Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=905 cells_after_filter=904 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=79.6 total_ms=86.6
2026-03-15T19:08:48.443812Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=71.5 total_ms=83.5
2026-03-15T19:08:51.505423Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
2026-03-15T19:08:52.293946Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=55.5 total_ms=64.3
2026-03-15T19:09:12.606611Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=86195dac7ffffff resolution=6 total=602 returned=100 offset=0 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.4
2026-03-15T19:09:17.589934Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=25877 filters=1 filters_raw="Listing status:Historical sale" ms=12.0
2026-03-15T19:09:19.152498Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1021 cells_after_filter=1017 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=944.3 total_ms=960.2
2026-03-15T19:09:19.587280Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
2026-03-15T19:09:20.120819Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=59.6 total_ms=68.9
2026-03-15T19:09:21.757704Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=86195dac7ffffff resolution=6 total=602 returned=100 offset=0 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.3
2026-03-15T19:09:26.458314Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=25877 filters=1 filters_raw="Listing status:Historical sale" ms=12.2
2026-03-15T19:09:26.734420Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.1
2026-03-15T19:09:27.270779Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=59.2 total_ms=68.7
2026-03-15T19:09:27.465769Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=29 filters=1 filters_raw="Listing status:For rent" ms=1.5
2026-03-15T19:09:27.551401Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1021 cells_after_filter=1017 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=938.9 total_ms=955.1
2026-03-15T19:09:27.636702Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=688 cells_after_filter=688 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=64.2 total_ms=69.4
2026-03-15T19:09:28.107949Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
2026-03-15T19:09:28.647682Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=57.9 total_ms=66.8
2026-03-15T19:10:43.696300Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=29 filters=1 filters_raw="Listing status:For rent" ms=1.5
2026-03-15T19:10:43.917385Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=688 cells_after_filter=688 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=66.4 total_ms=72.3
2026-03-15T19:10:44.773099Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=0.9
2026-03-15T19:10:45.309165Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=55.8 total_ms=65.3
2026-03-15T19:11:44.827084Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:11:45.019108Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:11:45.539093Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=67.3 total_ms=79.7
2026-03-15T19:12:22.900048Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1266.0 total_ms=1293.0
2026-03-15T19:12:43.487506Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=97.7 total_ms=119.2
2026-03-15T19:12:56.981906Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=92.3 total_ms=105.6
2026-03-15T19:15:16.007945Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:15:16.008103Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:15:16.008110Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:15:16.129677Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:15:16.129690Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:15:25.994671Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T19:15:25.994690Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T19:15:26.528002Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T19:15:26.528016Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T19:15:26.610377Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T19:15:26.610387Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T19:16:11.721573Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T19:16:11.721688Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T19:16:13.315773Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T19:16:13.525913Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T19:16:14.026360Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T19:16:15.880642Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T19:16:18.439507Z INFO property_map_server::data::property: Building enum features
2026-03-15T19:16:20.159326Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T19:16:22.764657Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T19:16:22.764667Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T19:16:23.416811Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T19:16:23.416820Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T19:16:24.612917Z INFO property_map_server::data::property: Building interned strings
2026-03-15T19:16:31.709728Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T19:16:34.550669Z INFO property_map_server::data::property: Data loading complete
2026-03-15T19:16:36.027023Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T19:16:36.027032Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T19:16:36.427705Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T19:16:36.427713Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T19:16:36.848803Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T19:16:36.848848Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T19:16:36.848862Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T19:16:36.922825Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T19:16:37.040700Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T19:16:37.041316Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T19:16:37.080709Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T19:16:37.080719Z INFO property_map_server: Building POI spatial grid index
2026-03-15T19:16:37.087492Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T19:16:37.087505Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T19:16:37.092797Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T19:16:37.093630Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T19:16:37.093696Z INFO property_map_server: Place data loaded places=3474
2026-03-15T19:16:37.093709Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T19:16:37.093714Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T19:16:37.097696Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T19:16:40.166666Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T19:16:40.166676Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T19:16:40.166689Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T19:16:40.223398Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T19:16:40.267971Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T19:16:40.329251Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T19:16:40.329417Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T19:16:40.329432Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T19:16:40.466894Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T19:16:40.474329Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T19:16:40.488014Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T19:16:40.724141Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfectpostcodes.schmelczer.dev/pb
2026-03-15T19:16:40.728811Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T19:16:40.728841Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T19:16:40.728857Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T19:16:40.781239Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T19:16:40.786720Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T19:16:40.789828Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T19:16:40.812650Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T19:16:40.812688Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T19:16:40.812745Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T19:16:44.806149Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T19:16:44.806208Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T19:18:41.554399Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1102.3 total_ms=1142.0
2026-03-15T19:18:51.943051Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1089.3 total_ms=1111.0
2026-03-15T19:18:52.235271Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=88.8 total_ms=101.0
2026-03-15T19:18:53.034978Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=71.2 total_ms=79.2
2026-03-15T19:18:54.485285Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.6 total_ms=98.8
2026-03-15T19:18:56.331564Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1100.7 total_ms=1122.4
2026-03-15T19:18:57.013412Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.3 total_ms=98.3
2026-03-15T19:18:57.497646Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=80.4 total_ms=87.3
2026-03-15T19:18:58.307229Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.4 total_ms=98.6
2026-03-15T19:18:59.209272Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=92.3 total_ms=115.9
2026-03-15T19:18:59.605162Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1104.1 total_ms=1125.8
2026-03-15T19:19:00.641552Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=101.5 total_ms=114.0
2026-03-15T19:19:01.055691Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1102.8 total_ms=1133.7
2026-03-15T19:19:01.767817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.5 total_ms=77.5
2026-03-15T19:19:02.094672Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=95.1 total_ms=107.1
2026-03-15T19:19:04.696909Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1094.7 total_ms=1116.4
2026-03-15T19:20:18.135252Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=87.8 total_ms=99.9
2026-03-15T19:21:06.589853Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.6 total_ms=77.5
2026-03-15T19:22:21.723624Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:22:21.723777Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:22:21.723788Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:22:21.792919Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:22:21.792931Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:24:04.122070Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:24:04.122238Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:24:04.122243Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:24:04.183691Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:24:04.183700Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:24:10.703405Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:24:10.703567Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:24:10.703573Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:24:10.764393Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:24:10.764404Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:24:13.175087Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T19:24:13.175096Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T19:24:14.743132Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T19:24:14.743145Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T19:24:15.276707Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T19:24:15.276719Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T19:32:31.326505Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T19:32:31.326667Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T19:32:31.326674Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T19:32:31.468299Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T19:32:31.468311Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T19:32:36.199335Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T19:32:36.199348Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T19:32:36.594288Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T19:32:36.594299Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T19:32:36.659669Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T19:32:36.659679Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T19:33:03.760178Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T19:33:03.760265Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T19:33:05.203275Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T19:33:05.385487Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T19:33:05.825530Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T19:33:07.452637Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T19:33:09.737556Z INFO property_map_server::data::property: Building enum features
2026-03-15T19:33:11.190065Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T19:33:13.640495Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T19:33:13.640506Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T19:33:14.224090Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T19:33:14.224101Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T19:33:15.218314Z INFO property_map_server::data::property: Building interned strings
2026-03-15T19:33:21.691381Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T19:33:24.347401Z INFO property_map_server::data::property: Data loading complete
2026-03-15T19:33:26.411071Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T19:33:26.411081Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T19:33:26.535487Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T19:33:26.535498Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T19:33:27.113000Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T19:33:27.113028Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T19:33:27.113036Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T19:33:27.165121Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T19:33:27.304878Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T19:33:27.305504Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T19:33:27.346347Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T19:33:27.346357Z INFO property_map_server: Building POI spatial grid index
2026-03-15T19:33:27.352391Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T19:33:27.352399Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T19:33:27.365758Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T19:33:27.366584Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T19:33:27.366644Z INFO property_map_server: Place data loaded places=3474
2026-03-15T19:33:27.366655Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T19:33:27.366659Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T19:33:27.392761Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T19:33:36.895174Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T19:33:36.895188Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T19:33:36.895208Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T19:33:36.998297Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T19:33:37.053356Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T19:33:37.109062Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T19:33:37.109355Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T19:33:37.109374Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T19:33:37.762412Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T19:33:37.767896Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T19:33:37.775340Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T19:33:38.060153Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-15T19:33:38.063925Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T19:33:38.063950Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T19:33:38.063967Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T19:33:38.278834Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T19:33:38.287416Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T19:33:38.292977Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T19:33:38.359034Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T19:33:38.359070Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T19:33:38.359129Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T19:33:48.937764Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T19:33:48.937811Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T19:33:49.510480Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:49.711250Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:49.717966Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:49.758705Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:50.515563Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:50.515611Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:50.526782Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:50.526799Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:50.624015Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:50.954642Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:51.089161Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:33:51.089169Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:33:51.344202Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=148.7 total_ms=177.0
2026-03-15T19:34:49.486571Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:34:49.486761Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:34:50.105387Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=147.2 total_ms=181.6
2026-03-15T19:35:47.333901Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:35:47.633561Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:35:47.964989Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=127.2 total_ms=151.0
2026-03-15T19:36:10.914163Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:36:10.935314Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:36:11.591360Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=148.2 total_ms=179.1
2026-03-15T19:37:10.487304Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=30.4 total_ms=42.4
2026-03-15T19:37:11.520281Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=34.6 total_ms=41.1
2026-03-15T19:37:12.429971Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=92.6 total_ms=114.8
2026-03-15T19:39:01.324173Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=78.6 total_ms=100.6
2026-03-15T19:39:11.452418Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=34.6 total_ms=46.5
2026-03-15T19:39:12.258031Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=104.1 total_ms=126.5
2026-03-15T19:39:13.209101Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=30.2 total_ms=41.9
2026-03-15T19:39:13.542817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=86.2 total_ms=108.5
2026-03-15T19:39:17.811115Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=78.0 total_ms=100.1
2026-03-15T19:39:18.543391Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.1 total_ms=42.7
2026-03-15T19:39:21.422553Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=99.7 total_ms=122.0
2026-03-15T19:39:24.089893Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=65.8 total_ms=87.6
2026-03-15T19:39:34.096698Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=34.2 total_ms=40.6
2026-03-15T19:39:35.136941Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=77.3 total_ms=99.4
2026-03-15T19:39:35.997965Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=27.5 total_ms=34.4
2026-03-15T19:39:36.896448Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=30.0 total_ms=43.4
2026-03-15T19:39:37.822906Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=97.3 total_ms=119.7
2026-03-15T19:39:38.005882Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.8 total_ms=43.6
2026-03-15T19:39:38.307634Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=32.0 total_ms=38.7
2026-03-15T19:39:39.034035Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=32.7 total_ms=44.5
2026-03-15T19:39:39.374475Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=100.4 total_ms=122.4
2026-03-15T19:39:39.415811Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=39.0 total_ms=50.9
2026-03-15T19:39:40.267881Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=28.4 total_ms=35.2
2026-03-15T19:39:40.542606Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=35.2 total_ms=47.1
2026-03-15T19:39:40.845586Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=88.6 total_ms=110.3
2026-03-15T19:39:42.206069Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=32.7 total_ms=44.7
2026-03-15T19:39:43.318640Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=27.1 total_ms=33.6
2026-03-15T19:39:44.688592Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.4 total_ms=117.7
2026-03-15T19:39:45.172008Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.2 total_ms=36.4
2026-03-15T19:39:45.850790Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=40.1 total_ms=51.9
2026-03-15T19:39:46.189922Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.0 total_ms=35.7
2026-03-15T19:39:46.998212Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=86.0 total_ms=108.9
2026-03-15T19:39:47.177336Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.1 total_ms=35.8
2026-03-15T19:39:47.696691Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.6 total_ms=118.0
2026-03-15T19:39:48.556326Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=28.2 total_ms=35.0
2026-03-15T19:39:48.906118Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=107.8 total_ms=129.7
2026-03-15T19:39:51.395259Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=66.3 total_ms=88.4
2026-03-15T19:39:58.878189Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=31.0 total_ms=37.7
2026-03-15T19:40:04.517960Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.5 total_ms=44.3
2026-03-15T19:40:17.346685Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.9 total_ms=119.8
2026-03-15T19:47:52.591641Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T19:47:52.591925Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T19:47:53.240629Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=95.9 total_ms=117.5
2026-03-15T19:48:12.279153Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=4253126 parallel=true cells_before_filter=3093 cells_after_filter=3064 truncated=false bounds=50.9497,-0.9772,51.9159,0.9233 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=54.4 total_ms=137.0
2026-03-15T19:48:13.512341Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=2883111 parallel=true cells_before_filter=1105 cells_after_filter=1088 truncated=false bounds=51.1146,-0.5552,51.6729,0.5420 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=36.5 total_ms=54.4
2026-03-15T19:48:14.110557Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=1687410 parallel=true cells_before_filter=2479 cells_after_filter=2434 truncated=false bounds=51.2171,-0.3474,51.5584,0.3232 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=22.6 total_ms=61.3
2026-03-15T19:48:16.632555Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=1007854 parallel=true cells_before_filter=5324 cells_after_filter=5035 truncated=false bounds=51.3626,-0.1740,51.5564,0.2074 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=16.9 total_ms=117.8
2026-03-15T19:51:26.541773Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=190938 parallel=true cells_before_filter=1076 cells_after_filter=1033 truncated=false bounds=51.4310,-0.0323,51.5159,0.1349 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=3.8 total_ms=20.0
2026-03-15T19:51:30.533448Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=384437 parallel=true cells_before_filter=2704 cells_after_filter=2497 truncated=false bounds=51.4379,0.0032,51.5871,0.2970 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=7.4 total_ms=46.9
2026-03-15T19:51:31.069688Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=632131 parallel=true cells_before_filter=3388 cells_after_filter=3150 truncated=false bounds=51.4091,-0.1211,51.5583,0.1727 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=10.9 total_ms=60.6
2026-03-15T19:51:35.046811Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=65919 parallel=true cells_before_filter=341 cells_after_filter=288 truncated=false bounds=51.4524,-0.0211,51.4936,0.0599 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=6.2
2026-03-15T19:51:36.299041Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=366 cells_after_filter=298 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.9 total_ms=6.5
2026-03-15T19:54:34.842844Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=102 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
2026-03-15T19:54:35.089458Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=5.0
2026-03-15T19:54:36.130976Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad2ec7ffff resolution=9 total=102 returned=100 offset=0 filters=1 filters_raw="Listing status:Historical sale" ms=0.4
2026-03-15T19:54:38.679953Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:54:38.832683Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=2.4
2026-03-15T19:55:05.573712Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=102 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
2026-03-15T19:55:06.053141Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.4 total_ms=4.8
2026-03-15T19:55:06.289059Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:55:06.464008Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.4
2026-03-15T19:55:07.257113Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=329 cells_after_filter=275 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=3.6
2026-03-15T19:55:10.176114Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:55:10.449590Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.6
2026-03-15T19:55:11.581365Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad2ecfffff resolution=9 total=20 returned=20 offset=0 filters=1 filters_raw="Listing status:For sale" ms=0.3
2026-03-15T19:55:12.826310Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=1 filters=1 filters_raw="Listing status:For rent" ms=0.1
2026-03-15T19:55:13.296370Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=69 cells_after_filter=54 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=0.4 total_ms=0.8
2026-03-15T19:55:14.274431Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.2
2026-03-15T19:55:14.745705Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.6
2026-03-15T19:55:15.410862Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.6
2026-03-15T19:55:15.973540Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=285 filters=1 filters_raw="Listing status:Historical sale" ms=0.2
2026-03-15T19:55:16.149036Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=5.2
2026-03-15T19:55:50.335806Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=366 cells_after_filter=298 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.8 total_ms=6.5
2026-03-15T19:55:51.579153Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e53ffff resolution=9 total_count=119 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
2026-03-15T19:55:51.847817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=4.8
2026-03-15T19:57:08.597832Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=5 rows=14962301 parallel=true cells_before_filter=687 cells_after_filter=687 truncated=false bounds=46.0000,-12.0000,56.5000,12.0000 filters=0 filters_raw="-" travel_entries=0 agg_ms=206.2 total_ms=218.4
2026-03-15T19:58:56.459660Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e53ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:58:56.611313Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.8
2026-03-15T19:58:58.208823Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=329 cells_after_filter=275 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=1.0 total_ms=4.1
2026-03-15T19:58:59.418179Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e5bffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:58:59.646741Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.5
2026-03-15T19:59:13.261566Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:59:16.381213Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad216fffff resolution=9 total_count=2 filters=1 filters_raw="Listing status:For sale" ms=0.1
2026-03-15T19:59:19.468079Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=65337 parallel=true cells_before_filter=263 cells_after_filter=187 truncated=false bounds=51.4575,-0.0360,51.4986,0.0187 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.8
2026-03-15T19:59:20.912780Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=57987 parallel=true cells_before_filter=230 cells_after_filter=192 truncated=false bounds=51.4560,-0.0422,51.4972,0.0126 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=2.4
2026-03-15T19:59:22.439657Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=45 filters=1 filters_raw="Listing status:For sale" ms=0.2
2026-03-15T19:59:25.975325Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad04a7ffff resolution=9 total=45 returned=45 offset=0 filters=1 filters_raw="Listing status:For sale" ms=0.4
2026-03-15T19:59:44.130233Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=27673 parallel=false cells_before_filter=124 cells_after_filter=79 truncated=false bounds=51.4630,-0.0335,51.4874,-0.0010 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=1.4
2026-03-15T19:59:44.863513Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=23785 parallel=false cells_before_filter=105 cells_after_filter=82 truncated=false bounds=51.4612,-0.0378,51.4857,-0.0053 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=1.4
2026-03-15T19:59:45.429953Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=10363 parallel=false cells_before_filter=39 cells_after_filter=27 truncated=false bounds=51.4668,-0.0300,51.4794,-0.0133 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.2 total_ms=0.6
2026-03-15T19:59:47.052444Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=69 postcodes_after_filter=22 filtered_out=47 truncated=false bounds=51.469424,-0.026465,51.476579,-0.016948 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.8
2026-03-15T19:59:48.970011Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=8 filtered_out=26 truncated=false bounds=51.470330,-0.025601,51.476006,-0.018052 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.5
2026-03-15T19:59:49.463765Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=4 filtered_out=30 truncated=false bounds=51.471351,-0.024627,51.475359,-0.019295 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.3
2026-03-15T19:59:51.056899Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=9 filtered_out=25 truncated=false bounds=51.471780,-0.026331,51.475789,-0.020998 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.4
2026-03-15T19:59:51.581180Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=6 filtered_out=28 truncated=false bounds=51.471674,-0.025659,51.475683,-0.020327 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.4
2026-03-15T19:59:58.001176Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=8 filtered_out=26 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.6
2026-03-15T20:00:01.147529Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=44 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold|Leasehold" ms=0.2
2026-03-15T20:00:01.649333Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=33 postcodes_after_filter=7 filtered_out=26 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold|Leasehold" travel_entries=0 total_ms=0.3
2026-03-15T20:00:03.524035Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=1 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" ms=0.2
2026-03-15T20:00:03.982950Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=5 postcodes_after_filter=1 filtered_out=4 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 total_ms=0.2
2026-03-15T20:00:05.734272Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=16 postcodes_after_filter=10 filtered_out=6 truncated=false bounds=51.468325,-0.029578,51.477865,-0.016889 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 total_ms=0.4
2026-03-15T20:00:07.598747Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=371660 parallel=true cells_before_filter=1032 cells_after_filter=874 truncated=false bounds=51.4033,-0.1135,51.5201,0.0418 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.3 total_ms=9.7
2026-03-15T20:00:07.902439Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=277117 parallel=true cells_before_filter=961 cells_after_filter=829 truncated=false bounds=51.4051,-0.0436,51.5218,0.1117 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.0 total_ms=9.7
2026-03-15T20:00:08.868329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=1446 cells_after_filter=1288 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.5 total_ms=14.6
2026-03-15T20:00:10.739310Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=38 filters=2 filters_raw="Listing status:Historical sale;;Leasehold/Freehold:Freehold" ms=0.2
2026-03-15T20:00:11.236022Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=2260 cells_after_filter=1984 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:Historical sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=5.3 total_ms=38.1
2026-03-15T20:00:12.173963Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=1 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" ms=0.2
2026-03-15T20:00:12.666440Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=1446 cells_after_filter=1288 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.6 total_ms=15.7
2026-03-15T20:00:19.409064Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=0 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" ms=0.2
2026-03-15T20:00:19.968653Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=2.2 total_ms=2.2
2026-03-15T20:00:20.722189Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=1971 cells_after_filter=1801 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=3.7 total_ms=6.2
2026-03-15T20:00:21.190329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=2.3 total_ms=2.3
2026-03-15T20:00:27.942711Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=2.1 total_ms=2.1
2026-03-15T20:00:29.461340Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=1.9 total_ms=1.9
2026-03-15T20:00:31.709909Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=2.1 total_ms=2.1
2026-03-15T20:00:32.512895Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=1836 cells_after_filter=1678 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=3.2 total_ms=5.6
2026-03-15T20:00:33.940425Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=1.6 total_ms=1.6
2026-03-15T20:00:34.849329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3141817 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1389,-0.4994,51.7648,0.7321 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=14.9 total_ms=14.9
2026-03-15T20:00:35.804047Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3560390 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.0590,-0.6228,51.8363,0.9068 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=9.3 total_ms=9.3
2026-03-15T20:00:36.131775Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=9.3 total_ms=9.3
2026-03-15T20:00:37.313585Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=1788 cells_after_filter=1780 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=11.8 total_ms=14.2
2026-03-15T20:00:40.426451Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.8 total_ms=10.8
2026-03-15T20:00:42.361059Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.4 total_ms=9.4
2026-03-15T20:00:43.409927Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=10.5 total_ms=12.0
2026-03-15T20:00:46.333117Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=8.8 total_ms=8.8
2026-03-15T20:00:48.082494Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=8.4 total_ms=8.4
2026-03-15T20:00:49.833842Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=10.1 total_ms=23.3
2026-03-15T20:01:02.011997Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1094 cells_after_filter=1088 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=11.0 total_ms=21.9
2026-03-15T20:01:03.418647Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=11.4 total_ms=13.1
2026-03-15T20:01:03.571497Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1094 cells_after_filter=1088 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=11.2 total_ms=12.9
2026-03-15T20:01:08.874337Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1054 cells_after_filter=1048 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=12.2 total_ms=23.6
2026-03-15T20:01:14.467687Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.8 total_ms=9.8
2026-03-15T20:01:15.872578Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1054 cells_after_filter=1048 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=12.0 total_ms=13.6
2026-03-15T20:01:16.342009Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=10.0 total_ms=10.0
2026-03-15T20:01:18.114175Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=10.2 total_ms=10.2
2026-03-15T20:01:40.296993Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4517,51.8453,0.7619 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.8 total_ms=9.8
2026-03-15T20:02:48.056601Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3111647 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2295,-0.4528,51.8458,0.7630 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.6 total_ms=9.6
2026-03-15T20:02:49.785245Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.4 total_ms=9.4
2026-03-15T20:03:02.786058Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=1285 cells_after_filter=1278 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=13.1 total_ms=26.2
2026-03-15T20:03:23.742245Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.0 total_ms=10.0
2026-03-15T20:03:30.904544Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.0 total_ms=9.0
2026-03-15T20:03:31.700115Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.5 total_ms=9.5
2026-03-15T20:03:33.123759Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.7 total_ms=10.7
2026-03-15T20:03:33.264092Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:2500" travel_entries=0 agg_ms=8.3 total_ms=8.3
2026-03-15T20:03:34.235435Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=1479 cells_after_filter=1465 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=10.7 total_ms=12.9
2026-03-15T20:03:34.567324Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=9.0 total_ms=9.0
2026-03-15T20:03:36.096362Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=7.6 total_ms=7.6
2026-03-15T20:03:37.751074Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=4814137 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=50.9471,-0.9801,52.1136,1.7728 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=11.8 total_ms=11.8
2026-03-15T20:03:38.835277Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=56.1 total_ms=56.1
2026-03-15T20:03:40.887729Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=49.8 total_ms=49.8
2026-03-15T20:03:42.529302Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3274 cells_after_filter=3270 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:Historical sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=84.4 total_ms=146.3
2026-03-15T20:03:44.390098Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=54.6 total_ms=54.6
2026-03-15T20:04:33.371452Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:04:33.371601Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:04:33.371608Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:04:33.457625Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:04:33.457635Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:04:35.974733Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:04:35.974742Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:04:36.398745Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:04:36.398757Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:04:36.476688Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:04:36.476699Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:04:46.252075Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:04:46.252184Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T20:04:47.610246Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T20:04:47.804418Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T20:04:48.220314Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T20:04:49.335558Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T20:04:51.577686Z INFO property_map_server::data::property: Building enum features
2026-03-15T20:04:53.025870Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T20:04:55.151481Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T20:04:55.151490Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T20:04:55.671708Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T20:04:55.671716Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T20:04:56.573495Z INFO property_map_server::data::property: Building interned strings
2026-03-15T20:05:02.540797Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T20:05:05.103322Z INFO property_map_server::data::property: Data loading complete
2026-03-15T20:05:06.794645Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T20:05:06.794655Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T20:05:06.892926Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T20:05:06.892936Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T20:05:07.310766Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T20:05:07.310799Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T20:05:07.310815Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T20:05:07.355286Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T20:05:07.468648Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T20:05:07.469230Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T20:05:07.506618Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T20:05:07.506627Z INFO property_map_server: Building POI spatial grid index
2026-03-15T20:05:07.512084Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T20:05:07.512091Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T20:05:07.525343Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T20:05:07.526115Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T20:05:07.526170Z INFO property_map_server: Place data loaded places=3474
2026-03-15T20:05:07.526181Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T20:05:07.526186Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T20:05:07.536262Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T20:05:15.228511Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T20:05:15.228521Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T20:05:15.228532Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T20:05:15.231530Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T20:05:15.273794Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T20:05:15.309107Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T20:05:15.309278Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T20:05:15.309292Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T20:05:15.446941Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T20:05:15.454051Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T20:05:15.459669Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T20:05:15.591403Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-15T20:05:15.597637Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T20:05:15.597658Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T20:05:15.597670Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T20:05:15.643613Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T20:05:15.644670Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T20:05:15.646043Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T20:05:15.653830Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T20:05:15.653847Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T20:05:15.653891Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T20:05:19.156377Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T20:05:19.156422Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T20:05:27.513039Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:05:27.517384Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:05:27.524994Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:05:27.531774Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:05:27.537007Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:05:27.542722Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:05:27.549237Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:05:27.563523Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:05:28.096739Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:05:28.096753Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:06:11.902556Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=55.7 total_ms=84.8
2026-03-15T20:08:11.734568Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=56.9 total_ms=83.5
2026-03-15T20:08:12.991872Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=70.2 total_ms=76.3
2026-03-15T20:08:13.507957Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=59.5 total_ms=63.9
2026-03-15T20:08:19.338486Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=74.5 total_ms=102.6
2026-03-15T20:08:20.686317Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=55.0 total_ms=59.5
2026-03-15T20:08:20.922174Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=60.6 total_ms=65.2
2026-03-15T20:08:26.772910Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3082 cells_after_filter=3081 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:788" travel_entries=0 agg_ms=62.6 total_ms=89.4
2026-03-15T20:08:28.805969Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=59.6 total_ms=64.4
2026-03-15T20:08:28.952002Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3082 cells_after_filter=3081 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:788" travel_entries=0 agg_ms=57.7 total_ms=62.3
2026-03-15T20:08:30.558244Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=58.2 total_ms=84.6
2026-03-15T20:08:33.038408Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=56.6 total_ms=61.3
2026-03-15T20:08:33.304924Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=59.1 total_ms=63.7
2026-03-15T20:08:34.406141Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=1106 cells_after_filter=1106 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=48.2 total_ms=55.7
2026-03-15T20:08:36.292323Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=9738472 parallel=true cells_before_filter=765 cells_after_filter=765 truncated=false bounds=49.7628,-3.2615,52.9110,4.1398 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=36.2 total_ms=41.7
2026-03-15T20:08:39.628619Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=2480095 parallel=true cells_before_filter=165 cells_after_filter=164 truncated=false bounds=51.3348,-0.4935,51.7042,0.3778 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=5.9 total_ms=7.2
2026-03-15T20:08:41.653852Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=18 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" ms=0.1
2026-03-15T20:08:41.915336Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=2176649 parallel=true cells_before_filter=139 cells_after_filter=139 truncated=false bounds=51.3348,-0.3793,51.7042,0.2636 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=5.4 total_ms=6.5
2026-03-15T20:08:44.824263Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=40 cells_after_filter=37 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=1.1 total_ms=1.4
2026-03-15T20:08:45.089912Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=88195da457fffff resolution=8 total=18 returned=18 offset=0 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" ms=0.2
2026-03-15T20:09:25.568766Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=3.8
2026-03-15T20:09:26.074762Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=40 cells_after_filter=37 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=1.2 total_ms=1.3
2026-03-15T20:09:28.666758Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=53 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" ms=0.1
2026-03-15T20:09:29.155687Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1096 cells_after_filter=963 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" travel_entries=0 agg_ms=2.6 total_ms=12.1
2026-03-15T20:09:29.978092Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=3.8
2026-03-15T20:09:30.496935Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1096 cells_after_filter=963 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" travel_entries=0 agg_ms=2.1 total_ms=3.7
2026-03-15T20:09:33.901621Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" ms=0.1
2026-03-15T20:09:34.385794Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.4 total_ms=10.3
2026-03-15T20:09:35.102671Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.2 total_ms=3.9
2026-03-15T20:09:35.587723Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.2 total_ms=3.7
2026-03-15T20:09:36.148540Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" ms=0.1
2026-03-15T20:09:36.644673Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" travel_entries=0 agg_ms=2.5 total_ms=10.4
2026-03-15T20:09:37.160139Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.2 total_ms=3.9
2026-03-15T20:09:37.683238Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" travel_entries=0 agg_ms=2.2 total_ms=3.8
2026-03-15T20:09:37.944776Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" ms=0.1
2026-03-15T20:09:37.969098Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" travel_entries=0 agg_ms=2.2 total_ms=10.4
2026-03-15T20:09:42.074237Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=4.2
2026-03-15T20:09:42.532178Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" travel_entries=0 agg_ms=2.4 total_ms=4.0
2026-03-15T20:09:50.016920Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" ms=0.1
2026-03-15T20:09:50.508364Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.2 total_ms=10.2
2026-03-15T20:10:13.683691Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:10:13.683848Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:10:13.683854Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:10:13.750258Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:10:13.750268Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:10:16.179096Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:10:16.179106Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:10:16.456525Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:10:16.456537Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:10:16.514061Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:10:16.514070Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:10:22.681306Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:10:22.681402Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T20:10:24.012054Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T20:10:24.180353Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T20:10:24.607077Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T20:10:25.773925Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T20:10:27.857482Z INFO property_map_server::data::property: Building enum features
2026-03-15T20:10:29.227608Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T20:10:31.336600Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T20:10:31.336609Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T20:10:31.843715Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T20:10:31.843723Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T20:10:32.766778Z INFO property_map_server::data::property: Building interned strings
2026-03-15T20:10:38.715142Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T20:10:41.203246Z INFO property_map_server::data::property: Data loading complete
2026-03-15T20:10:42.829684Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T20:10:42.829695Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T20:10:42.925550Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T20:10:42.925560Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T20:10:43.323292Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T20:10:43.323313Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T20:10:43.323319Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T20:10:43.341356Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T20:10:43.453718Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T20:10:43.454297Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T20:10:43.490938Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T20:10:43.490947Z INFO property_map_server: Building POI spatial grid index
2026-03-15T20:10:43.496143Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T20:10:43.496149Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T20:10:43.496678Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T20:10:43.497419Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T20:10:43.497476Z INFO property_map_server: Place data loaded places=3474
2026-03-15T20:10:43.497486Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T20:10:43.497491Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T20:10:43.498313Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T20:10:50.089457Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T20:10:50.089468Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T20:10:50.089483Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T20:10:50.089693Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T20:10:50.134239Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T20:10:50.160720Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T20:10:50.160908Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T20:10:50.160921Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T20:10:50.220618Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T20:10:50.224719Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T20:10:50.247220Z INFO property_map_server::pocketbase: Added notes text field to PocketBase collection 'saved_searches'
2026-03-15T20:10:50.251061Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T20:10:50.257749Z INFO property_map_server::pocketbase: Added notes text field to PocketBase collection 'saved_properties'
2026-03-15T20:10:50.304219Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-15T20:10:50.308723Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T20:10:50.308749Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T20:10:50.308761Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T20:10:50.314963Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T20:10:50.315108Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T20:10:50.315266Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T20:10:50.316776Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T20:10:50.316796Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T20:10:50.316843Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T20:10:51.706624Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T20:10:51.706663Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T20:10:52.074357Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:10:52.074443Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:10:52.507461Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:10:52.508607Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:10:52.516615Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:10:52.522899Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:10:52.536710Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:10:52.541257Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:10:52.549244Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:10:52.550031Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:18:43.657651Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:18:43.657816Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:18:43.657822Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:18:43.746197Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:18:43.746208Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:18:46.373581Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:18:46.373592Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:18:46.679114Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:18:46.679124Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:18:46.747208Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:18:46.747221Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:19:00.212275Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:19:00.212447Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:19:00.212456Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:19:00.289849Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:19:00.289859Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:19:02.760385Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:19:02.760396Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:19:03.052345Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:19:03.052355Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:19:03.116051Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:19:03.116060Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:19:09.885025Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:19:09.885115Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
2026-03-15T20:19:11.176403Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T20:19:11.363917Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T20:19:11.771906Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T20:19:13.025850Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T20:19:15.244059Z INFO property_map_server::data::property: Building enum features
2026-03-15T20:19:16.598869Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T20:19:18.813936Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T20:19:18.813945Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T20:19:19.353132Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T20:19:19.353141Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T20:19:20.231462Z INFO property_map_server::data::property: Building interned strings
2026-03-15T20:19:26.257466Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T20:19:28.766985Z INFO property_map_server::data::property: Data loading complete
2026-03-15T20:19:30.388072Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
2026-03-15T20:19:30.388081Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T20:19:30.483869Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T20:19:30.483878Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T20:19:30.866157Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T20:19:30.866199Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T20:19:30.866214Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T20:19:30.902414Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T20:19:31.015549Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T20:19:31.016109Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T20:19:31.054466Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T20:19:31.054474Z INFO property_map_server: Building POI spatial grid index
2026-03-15T20:19:31.059717Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T20:19:31.059723Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T20:19:31.069876Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T20:19:31.070582Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T20:19:31.070632Z INFO property_map_server: Place data loaded places=3474
2026-03-15T20:19:31.070640Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T20:19:31.070644Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T20:19:31.079677Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T20:19:39.095251Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T20:19:39.095261Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T20:19:39.095277Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T20:19:39.185235Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T20:19:39.228620Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T20:19:39.315674Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T20:19:39.315892Z INFO property_map_server: Precomputed features response groups=9
2026-03-15T20:19:39.315908Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T20:19:39.370690Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T20:19:39.373624Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T20:19:39.378395Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T20:19:39.427367Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-15T20:19:39.430988Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T20:19:39.431004Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T20:19:39.431017Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T20:19:39.437636Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T20:19:39.437807Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T20:19:39.437966Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T20:19:39.439692Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T20:19:39.439715Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T20:19:39.439777Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T20:19:42.954025Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T20:19:42.954067Z INFO property_map_server: Server listening on 0.0.0.0:8001
2026-03-15T20:19:43.261880Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:19:43.263169Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:43.749947Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:43.751657Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:44.102549Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:44.111787Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:44.529068Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:44.529991Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:44.546765Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:44.551558Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:44.558857Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:44.565720Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:44.574121Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:44.577816Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.493074Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:50.493085Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.514758Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:50.515163Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.526484Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.530275Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:50.535884Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.542453Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:50.551188Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:20:50.556666Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:51.102224Z INFO property_map_server::routes::features: GET /api/features
2026-03-15T20:20:51.110261Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
2026-03-15T20:21:45.349498Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:21:45.349655Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:21:45.349664Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:21:45.421388Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:21:45.421400Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:21:47.937781Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:21:47.937791Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:21:48.227331Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:21:48.227342Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:21:48.286224Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:21:48.286233Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:22:05.552980Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:31:37.445987Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:31:37.446148Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:31:37.446155Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:31:37.534049Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:31:37.534061Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:31:40.177015Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:31:40.177028Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:31:40.482618Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:31:40.482630Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:31:40.546018Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:31:40.546027Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:31:53.986877Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:31:53.987039Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:31:53.987045Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:31:54.063239Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:31:54.063248Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:31:56.648053Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:31:56.648065Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:31:56.965183Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:31:56.965194Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:31:57.027327Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:31:57.027342Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:31:59.916992Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:32:04.424692Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:32:04.424881Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:32:04.424890Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:32:04.493173Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:32:04.493184Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:32:07.324815Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:32:07.324827Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:32:07.628171Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:32:07.628182Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:32:07.697976Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:32:07.697987Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:32:09.987127Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:32:15.239857Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:32:15.240016Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:32:15.240027Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:32:15.312610Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:32:15.312619Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:32:17.889502Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:32:17.889512Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:32:18.181929Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:32:18.181939Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:32:18.245757Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:32:18.245767Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:32:23.084864Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:32:23.085017Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:32:23.085025Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:32:23.149174Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:32:23.149184Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:32:25.785485Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:32:25.785496Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:32:26.076631Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:32:26.076644Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:32:26.135954Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:32:26.135967Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:32:28.972888Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:42:17.088723Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:42:17.088899Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:42:17.088907Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:42:17.150999Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:42:17.151009Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:42:19.827707Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:42:19.827719Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:42:20.135500Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:42:20.135509Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:42:20.197192Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:42:20.197202Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:42:56.103982Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:42:56.104138Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:42:56.104143Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:42:56.204428Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:42:56.204439Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:42:58.832976Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:42:58.832987Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:42:59.132876Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:42:59.132886Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:42:59.192137Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:42:59.192148Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:43:26.892462Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T20:56:11.923543Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:56:11.923747Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:56:11.923760Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:56:12.004141Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:56:12.004153Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:56:18.202087Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:56:18.202098Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:56:18.603337Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:56:18.603351Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:56:18.676290Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:56:18.676299Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:56:51.555700Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T20:56:51.555882Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T20:56:51.555890Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T20:56:51.649380Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T20:56:51.649390Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T20:56:54.479400Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T20:56:54.479413Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T20:56:54.787050Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T20:56:54.787063Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T20:56:54.853848Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T20:56:54.853857Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T20:57:26.731834Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T21:00:33.407080Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T21:00:33.407232Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T21:00:33.407238Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T21:00:33.499072Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T21:00:33.499081Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T21:00:36.274914Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T21:00:36.274924Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T21:00:36.575562Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T21:00:36.575572Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T21:00:36.638808Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T21:00:36.638817Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T21:00:50.949722Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T21:03:11.209421Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T21:03:11.209606Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T21:03:11.209612Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T21:03:11.289498Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T21:03:11.289508Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T21:03:13.875043Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T21:03:13.875053Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T21:03:14.208971Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T21:03:14.208982Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T21:03:14.272084Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T21:03:14.272095Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T21:03:28.932912Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T21:03:28.933073Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T21:03:28.933082Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T21:03:29.004139Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T21:03:29.004150Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T21:03:31.538874Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T21:03:31.538884Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T21:03:31.819659Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T21:03:31.819669Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T21:03:31.879197Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T21:03:31.879206Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T21:03:46.003768Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T21:19:16.107508Z INFO property_map_server: Prometheus metrics initialized
2026-03-15T21:19:16.107694Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
2026-03-15T21:19:16.107702Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
2026-03-15T21:19:16.272588Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
2026-03-15T21:19:16.272599Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-03-15T21:19:22.944816Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
2026-03-15T21:19:22.944828Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
2026-03-15T21:19:23.587534Z INFO property_map_server::data::property: buy listings joined rows=474965
2026-03-15T21:19:23.589329Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
2026-03-15T21:19:23.673638Z INFO property_map_server::data::property: rent listings joined rows=24345
2026-03-15T21:19:23.673648Z INFO property_map_server::data::property: Concatenating all data sources
2026-03-15T21:20:10.134367Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
2026-03-15T21:20:10.173078Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
2026-03-15T21:20:11.640174Z INFO property_map_server::data::property: Combined data selected rows=15702691
2026-03-15T21:20:11.825306Z INFO property_map_server::data::property: Extracting numeric feature columns
2026-03-15T21:20:12.283833Z INFO property_map_server::data::property: Computing histograms for numeric features
2026-03-15T21:20:13.735551Z INFO property_map_server::data::property: Extracting string columns
2026-03-15T21:20:16.091555Z INFO property_map_server::data::property: Building enum features
2026-03-15T21:20:17.505895Z INFO property_map_server::data::property: Extracting renovation history
2026-03-15T21:20:19.730770Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
2026-03-15T21:20:19.730780Z INFO property_map_server::data::property: Extracting listing features
2026-03-15T21:20:20.299294Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
2026-03-15T21:20:20.299302Z INFO property_map_server::data::property: Sorting rows by spatial locality
2026-03-15T21:20:21.284951Z INFO property_map_server::data::property: Building interned strings
2026-03-15T21:20:27.822185Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
2026-03-15T21:20:30.681373Z INFO property_map_server::data::property: Data loading complete
2026-03-15T21:20:32.249224Z INFO property_map_server: Property data loaded rows=15702691 features=68 enums=13
2026-03-15T21:20:32.249237Z INFO property_map_server: Building spatial grid index (0.01° cells)
2026-03-15T21:20:32.646329Z INFO property_map_server: Precomputing H3 cells at resolution 12
2026-03-15T21:20:32.646339Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
2026-03-15T21:20:33.084366Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
2026-03-15T21:20:33.084392Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
2026-03-15T21:20:33.084431Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
2026-03-15T21:20:33.115170Z INFO property_map_server::data::poi: Loaded 678242 POIs
2026-03-15T21:20:33.231940Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
2026-03-15T21:20:33.232512Z INFO property_map_server::data::poi: POI data loading complete.
2026-03-15T21:20:33.270242Z INFO property_map_server: POI data loaded pois=678242
2026-03-15T21:20:33.270252Z INFO property_map_server: Building POI spatial grid index
2026-03-15T21:20:33.275905Z INFO property_map_server: Loading place data from /app/data/places.parquet
2026-03-15T21:20:33.275913Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
2026-03-15T21:20:33.280404Z INFO property_map_server::data::places: Loaded 3474 places
2026-03-15T21:20:33.281434Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
2026-03-15T21:20:33.281491Z INFO property_map_server: Place data loaded places=3474
2026-03-15T21:20:33.281502Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
2026-03-15T21:20:33.281509Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
2026-03-15T21:20:33.282358Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
2026-03-15T21:20:41.788648Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
2026-03-15T21:20:42.014614Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
2026-03-15T21:20:42.014635Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
2026-03-15T21:20:42.107058Z INFO property_map_server: PMTiles loaded successfully
2026-03-15T21:20:42.150975Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
2026-03-15T21:20:42.241257Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
2026-03-15T21:20:42.241801Z INFO property_map_server: Precomputed features response groups=8
2026-03-15T21:20:42.241820Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
2026-03-15T21:20:42.361970Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
2026-03-15T21:20:42.370041Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
2026-03-15T21:20:42.375199Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
2026-03-15T21:20:42.642209Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
2026-03-15T21:20:42.651503Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
2026-03-15T21:20:42.651536Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
2026-03-15T21:20:42.651550Z INFO property_map_server: Loading travel time data from /app/data/travel-times
2026-03-15T21:20:42.658771Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
2026-03-15T21:20:42.658942Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
2026-03-15T21:20:42.659104Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
2026-03-15T21:20:42.661000Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
2026-03-15T21:20:42.661031Z INFO property_map_server: Travel time store loaded modes=4
2026-03-15T21:20:42.661088Z INFO property_map_server: Precomputed AI filters system prompt
2026-03-15T21:20:43.239746Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
2026-03-15T21:20:47.481371Z INFO property_map_server: All memory pages locked (mlockall)
2026-03-15T21:20:47.481419Z INFO property_map_server: Server listening on 0.0.0.0:8001

View file

@ -168,7 +168,12 @@ impl PostcodeData {
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east)); local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
} }
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids, local_aabbs)) Ok::<_, anyhow::Error>((
local_postcodes,
local_polygons,
local_centroids,
local_aabbs,
))
}) })
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;

View file

@ -543,10 +543,9 @@ impl PropertyData {
listings_buy listings_buy
.lazy() .lazy()
.with_column( .with_column(
(col("Asking price").cast(DataType::Float64) (col("Asking price").cast(DataType::Float64) / col("Total floor area (sqm)"))
/ col("Total floor area (sqm)")) .round(0)
.round(0) .alias("Asking price per sqm"),
.alias("Asking price per sqm"),
) )
.collect() .collect()
.context("Failed to derive Asking price per sqm")? .context("Failed to derive Asking price per sqm")?

View file

@ -820,8 +820,7 @@ async fn poll_pocketbase_counts(state: &AppState) {
("type", "redeemed"), ("type", "redeemed"),
), ),
] { ] {
if let Some(total) = pb_count(&state.http_client, pb_url, &token, "invites", filter).await if let Some(total) = pb_count(&state.http_client, pb_url, &token, "invites", filter).await {
{
gauge!(metric, labels.0 => labels.1.to_string()).set(total as f64); gauge!(metric, labels.0 => labels.1.to_string()).set(total as f64);
} }
} }

View file

@ -3,9 +3,9 @@ use std::sync::Arc;
use axum::http::StatusCode; use axum::http::StatusCode;
use axum::response::Json; use axum::response::Json;
use axum::Extension; use axum::Extension;
use metrics::counter;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{json, Value}; use serde_json::{json, Value};
use metrics::counter;
use tracing::{info, warn}; use tracing::{info, warn};
use crate::auth::OptionalUser; use crate::auth::OptionalUser;

View file

@ -4,11 +4,11 @@ use axum::extract::Query;
use axum::http::StatusCode; use axum::http::StatusCode;
use axum::response::{IntoResponse, Json}; use axum::response::{IntoResponse, Json};
use axum::Extension; use axum::Extension;
use metrics::histogram;
use rayon::prelude::*; use rayon::prelude::*;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use metrics::histogram;
use tracing::info; use tracing::info;
use crate::aggregation::Aggregator; use crate::aggregation::Aggregator;
@ -230,8 +230,13 @@ pub async fn get_hexagons(
) { ) {
continue; continue;
} }
let cell_id = let cell_id = cell_for_row_cached(
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache); row,
precomputed,
h3_res,
need_parent,
&mut h3_cache,
);
let agg = local_groups let agg = local_groups
.entry(cell_id) .entry(cell_id)
.or_insert_with(|| Aggregator::new(num_features)); .or_insert_with(|| Aggregator::new(num_features));

View file

@ -4,10 +4,10 @@ use axum::extract::{Path, Query};
use axum::http::StatusCode; use axum::http::StatusCode;
use axum::response::{IntoResponse, Json}; use axum::response::{IntoResponse, Json};
use axum::Extension; use axum::Extension;
use metrics::histogram;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use metrics::histogram;
use tracing::info; use tracing::info;
use crate::aggregation::Aggregator; use crate::aggregation::Aggregator;

View file

@ -67,9 +67,7 @@ enum FeatureAccum {
global_max: f32, global_max: f32,
}, },
/// Enum: count occurrences per variant index. /// Enum: count occurrences per variant index.
Enum { Enum { value_counts: Vec<u64> },
value_counts: Vec<u64>,
},
/// Feature skipped (not in field_set). /// Feature skipped (not in field_set).
Skip, Skip,
} }