This commit is contained in:
Andras Schmelczer 2026-03-15 17:38:26 +00:00
parent 80c093b7ba
commit f72c43a9fa
101 changed files with 2168 additions and 1177 deletions

View file

@ -128,7 +128,7 @@
"CROSS_DEDUP_BUY = 2_220\n",
"hk_buy_total = len(buy.filter(pl.col(\"source\") == \"Home.co.uk\")) + CROSS_DEDUP_BUY\n",
"hk_buy_unique = len(buy.filter(pl.col(\"source\") == \"Home.co.uk\"))\n",
"print(f\"\\n--- BUY overlap analysis ---\")\n",
"print(\"\\n--- BUY overlap analysis ---\")\n",
"print(f\"Home.co.uk scraped (before dedup): {hk_buy_total:,}\")\n",
"print(f\"Home.co.uk unique (after dedup): {hk_buy_unique:,}\")\n",
"print(f\"Cross-source duplicates removed: {CROSS_DEDUP_BUY:,}\")\n",
@ -1114,7 +1114,7 @@
"\n",
"print(f\"Outcodes with home.co.uk listings: {len(oc_comparison)}\")\n",
"print(f\"Total outcodes in dataset: {buy_oc['outcode'].drop_nulls().n_unique()}\")\n",
"print(f\"\\nHome.co.uk coverage by outcode:\")\n",
"print(\"\\nHome.co.uk coverage by outcode:\")\n",
"oc_comparison"
]
},
@ -7315,7 +7315,7 @@
"print(f\"Covered outcodes: {covered_count}\")\n",
"print(f\"Total outcodes: {total_outcodes}\")\n",
"print()\n",
"print(f\"In covered outcodes:\")\n",
"print(\"In covered outcodes:\")\n",
"print(f\" Rightmove: {rm_in_covered:,} listings\")\n",
"print(f\" Home.co.uk: {hk_buy_unique:,} unique listings\")\n",
"print(f\" HK/RM ratio: {ratio_in_covered:.2f}\")\n",
@ -7326,13 +7326,13 @@
"projected_dedup = int(projected_hk * CROSS_DEDUP_BUY / hk_buy_total)\n",
"projected_unique = projected_hk - projected_dedup\n",
"\n",
"print(f\"--- Projected full-coverage estimates ---\")\n",
"print(\"--- Projected full-coverage estimates ---\")\n",
"print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n",
"print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n",
"print(f\" Projected unique additions: ~{projected_unique:,}\")\n",
"print(f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique/rm_buy*100:.1f}% increase)\")\n",
"print()\n",
"print(f\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
"print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
]
},
{