This commit is contained in:
Andras Schmelczer 2026-01-28 22:10:51 +00:00
parent 500b9ef2aa
commit c08970c06c

View file

@ -99,7 +99,7 @@
"row_count = lf.select(pl.len()).collect().item()\n",
"london_count = lf.filter(LONDON_FILTER).select(pl.len()).collect().item()\n",
"print(f\"Total transactions: {row_count:,}\")\n",
"print(f\"London transactions: {london_count:,} ({london_count/row_count*100:.1f}%)\")"
"print(f\"London transactions: {london_count:,} ({london_count / row_count * 100:.1f}%)\")"
]
},
{
@ -208,6 +208,7 @@
" pl.col(\"price\").quantile(0.95).alias(\"p95\"),\n",
" ).collect()\n",
"\n",
"\n",
"print(\"=== National Price Statistics ===\")\n",
"display(get_price_stats(lf))\n",
"print(\"\\n=== London Price Statistics ===\")\n",
@ -1115,16 +1116,36 @@
" lazy_frame.group_by(\"property_type\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .sort(\"count\", descending=True)\n",
" .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\"))\n",
" .with_columns(\n",
" pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\")\n",
" )\n",
" .collect()\n",
" )\n",
"\n",
"\n",
"property_types_national = get_property_types(lf)\n",
"property_types_london = get_property_types(lf.filter(LONDON_FILTER))\n",
"\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"National\", \"London\"))\n",
"fig.add_trace(go.Bar(x=property_types_national[\"type_name\"], y=property_types_national[\"count\"], name=\"National\"), row=1, col=1)\n",
"fig.add_trace(go.Bar(x=property_types_london[\"type_name\"], y=property_types_london[\"count\"], name=\"London\", marker_color=\"crimson\"), row=1, col=2)\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=property_types_national[\"type_name\"],\n",
" y=property_types_national[\"count\"],\n",
" name=\"National\",\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=property_types_london[\"type_name\"],\n",
" y=property_types_london[\"count\"],\n",
" name=\"London\",\n",
" marker_color=\"crimson\",\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"fig.update_layout(title_text=\"Property Type Distribution\", showlegend=False, height=400)\n",
"fig.show()"
]
@ -2013,12 +2034,34 @@
" .collect()\n",
" )\n",
"\n",
"\n",
"tenure_national = get_tenure(lf)\n",
"tenure_london = get_tenure(lf.filter(LONDON_FILTER))\n",
"\n",
"fig = make_subplots(rows=1, cols=2, specs=[[{\"type\": \"pie\"}, {\"type\": \"pie\"}]], subplot_titles=(\"National\", \"London\"))\n",
"fig.add_trace(go.Pie(labels=tenure_national[\"tenure_name\"], values=tenure_national[\"count\"], name=\"National\"), row=1, col=1)\n",
"fig.add_trace(go.Pie(labels=tenure_london[\"tenure_name\"], values=tenure_london[\"count\"], name=\"London\"), row=1, col=2)\n",
"fig = make_subplots(\n",
" rows=1,\n",
" cols=2,\n",
" specs=[[{\"type\": \"pie\"}, {\"type\": \"pie\"}]],\n",
" subplot_titles=(\"National\", \"London\"),\n",
")\n",
"fig.add_trace(\n",
" go.Pie(\n",
" labels=tenure_national[\"tenure_name\"],\n",
" values=tenure_national[\"count\"],\n",
" name=\"National\",\n",
" ),\n",
" row=1,\n",
" col=1,\n",
")\n",
"fig.add_trace(\n",
" go.Pie(\n",
" labels=tenure_london[\"tenure_name\"],\n",
" values=tenure_london[\"count\"],\n",
" name=\"London\",\n",
" ),\n",
" row=1,\n",
" col=2,\n",
")\n",
"fig.update_layout(title_text=\"Freehold vs Leasehold\", height=400)\n",
"fig.show()"
]
@ -2958,10 +3001,16 @@
" .collect()\n",
")\n",
"\n",
"fig = px.bar(top_counties.to_pandas(), x=\"count\", y=\"county\", orientation=\"h\",\n",
"fig = px.bar(\n",
" top_counties.to_pandas(),\n",
" x=\"count\",\n",
" y=\"county\",\n",
" orientation=\"h\",\n",
" title=\"Top 20 Counties by Transaction Volume\",\n",
" color=\"avg_price\", color_continuous_scale=\"Blues\",\n",
" labels={\"count\": \"Transactions\", \"county\": \"County\", \"avg_price\": \"Avg Price\"})\n",
" color=\"avg_price\",\n",
" color_continuous_scale=\"Blues\",\n",
" labels={\"count\": \"Transactions\", \"county\": \"County\", \"avg_price\": \"Avg Price\"},\n",
")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=600)\n",
"fig.show()"
]
@ -4591,15 +4640,29 @@
"london_boroughs = (\n",
" lf.filter(LONDON_FILTER)\n",
" .group_by(\"district\")\n",
" .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n",
" .agg(\n",
" pl.len().alias(\"count\"),\n",
" pl.col(\"price\").mean().alias(\"avg_price\"),\n",
" pl.col(\"price\").median().alias(\"median_price\"),\n",
" )\n",
" .sort(\"avg_price\", descending=True)\n",
" .collect()\n",
")\n",
"\n",
"fig = px.bar(london_boroughs.to_pandas(), x=\"avg_price\", y=\"district\", orientation=\"h\",\n",
"fig = px.bar(\n",
" london_boroughs.to_pandas(),\n",
" x=\"avg_price\",\n",
" y=\"district\",\n",
" orientation=\"h\",\n",
" title=\"London Boroughs by Average Price\",\n",
" color=\"count\", color_continuous_scale=\"Reds\",\n",
" labels={\"avg_price\": \"Average Price (£)\", \"district\": \"Borough\", \"count\": \"Transactions\"})\n",
" color=\"count\",\n",
" color_continuous_scale=\"Reds\",\n",
" labels={\n",
" \"avg_price\": \"Average Price (£)\",\n",
" \"district\": \"Borough\",\n",
" \"count\": \"Transactions\",\n",
" },\n",
")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n",
"fig.show()"
]
@ -4622,11 +4685,16 @@
" return (\n",
" lazy_frame.with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n",
" .group_by(\"year\")\n",
" .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n",
" .agg(\n",
" pl.len().alias(\"count\"),\n",
" pl.col(\"price\").mean().alias(\"avg_price\"),\n",
" pl.col(\"price\").median().alias(\"median_price\"),\n",
" )\n",
" .sort(\"year\")\n",
" .collect()\n",
" )\n",
"\n",
"\n",
"yearly_national = get_yearly_stats(lf)\n",
"yearly_london = get_yearly_stats(lf.filter(LONDON_FILTER))"
]
@ -5473,9 +5541,29 @@
],
"source": [
"fig = go.Figure()\n",
"fig.add_trace(go.Scatter(x=yearly_national[\"year\"], y=yearly_national[\"avg_price\"], name=\"National\", mode=\"lines+markers\"))\n",
"fig.add_trace(go.Scatter(x=yearly_london[\"year\"], y=yearly_london[\"avg_price\"], name=\"London\", mode=\"lines+markers\", line=dict(color=\"crimson\")))\n",
"fig.update_layout(title=\"Average Price by Year\", xaxis_title=\"Year\", yaxis_title=\"Average Price (£)\", height=500)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=yearly_national[\"year\"],\n",
" y=yearly_national[\"avg_price\"],\n",
" name=\"National\",\n",
" mode=\"lines+markers\",\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=yearly_london[\"year\"],\n",
" y=yearly_london[\"avg_price\"],\n",
" name=\"London\",\n",
" mode=\"lines+markers\",\n",
" line=dict(color=\"crimson\"),\n",
" )\n",
")\n",
"fig.update_layout(\n",
" title=\"Average Price by Year\",\n",
" xaxis_title=\"Year\",\n",
" yaxis_title=\"Average Price (£)\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
},
@ -6333,14 +6421,38 @@
}
],
"source": [
"yearly_national_pct = yearly_national.with_columns((pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\"))\n",
"yearly_london_pct = yearly_london.with_columns((pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\"))\n",
"yearly_national_pct = yearly_national.with_columns(\n",
" (pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\")\n",
")\n",
"yearly_london_pct = yearly_london.with_columns(\n",
" (pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\")\n",
")\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Bar(x=yearly_national_pct[\"year\"], y=yearly_national_pct[\"yoy_change\"], name=\"National\", opacity=0.7))\n",
"fig.add_trace(go.Bar(x=yearly_london_pct[\"year\"], y=yearly_london_pct[\"yoy_change\"], name=\"London\", opacity=0.7))\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=yearly_national_pct[\"year\"],\n",
" y=yearly_national_pct[\"yoy_change\"],\n",
" name=\"National\",\n",
" opacity=0.7,\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" x=yearly_london_pct[\"year\"],\n",
" y=yearly_london_pct[\"yoy_change\"],\n",
" name=\"London\",\n",
" opacity=0.7,\n",
" )\n",
")\n",
"fig.add_hline(y=0, line_dash=\"dash\", line_color=\"gray\")\n",
"fig.update_layout(title=\"Year-over-Year Price Change (%)\", xaxis_title=\"Year\", yaxis_title=\"Change (%)\", barmode=\"group\", height=500)\n",
"fig.update_layout(\n",
" title=\"Year-over-Year Price Change (%)\",\n",
" xaxis_title=\"Year\",\n",
" yaxis_title=\"Change (%)\",\n",
" barmode=\"group\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
},
@ -7448,18 +7560,43 @@
" lazy_frame.filter(pl.col(\"date_of_transfer\").dt.year() >= start_year)\n",
" .with_columns(pl.col(\"date_of_transfer\").dt.truncate(\"1mo\").alias(\"month\"))\n",
" .group_by(\"month\")\n",
" .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n",
" .agg(\n",
" pl.len().alias(\"count\"),\n",
" pl.col(\"price\").mean().alias(\"avg_price\"),\n",
" pl.col(\"price\").median().alias(\"median_price\"),\n",
" )\n",
" .sort(\"month\")\n",
" .collect()\n",
" )\n",
"\n",
"\n",
"monthly_national = get_monthly_stats(lf)\n",
"monthly_london = get_monthly_stats(lf.filter(LONDON_FILTER))\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Scatter(x=monthly_national[\"month\"], y=monthly_national[\"avg_price\"], name=\"National\", mode=\"lines\"))\n",
"fig.add_trace(go.Scatter(x=monthly_london[\"month\"], y=monthly_london[\"avg_price\"], name=\"London\", mode=\"lines\", line=dict(color=\"crimson\")))\n",
"fig.update_layout(title=\"Monthly Average Price (2015 onwards)\", xaxis_title=\"Month\", yaxis_title=\"Average Price (£)\", height=500)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=monthly_national[\"month\"],\n",
" y=monthly_national[\"avg_price\"],\n",
" name=\"National\",\n",
" mode=\"lines\",\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=monthly_london[\"month\"],\n",
" y=monthly_london[\"avg_price\"],\n",
" name=\"London\",\n",
" mode=\"lines\",\n",
" line=dict(color=\"crimson\"),\n",
" )\n",
")\n",
"fig.update_layout(\n",
" title=\"Monthly Average Price (2015 onwards)\",\n",
" xaxis_title=\"Month\",\n",
" yaxis_title=\"Average Price (£)\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
},
@ -8431,9 +8568,19 @@
" .collect()\n",
")\n",
"\n",
"fig = px.line(yearly_by_type_london.to_pandas(), x=\"year\", y=\"avg_price\", color=\"type_name\",\n",
"fig = px.line(\n",
" yearly_by_type_london.to_pandas(),\n",
" x=\"year\",\n",
" y=\"avg_price\",\n",
" color=\"type_name\",\n",
" title=\"London: Average Price by Property Type Over Time\",\n",
" labels={\"avg_price\": \"Average Price (£)\", \"year\": \"Year\", \"type_name\": \"Property Type\"}, markers=True)\n",
" labels={\n",
" \"avg_price\": \"Average Price (£)\",\n",
" \"year\": \"Year\",\n",
" \"type_name\": \"Property Type\",\n",
" },\n",
" markers=True,\n",
")\n",
"fig.update_layout(height=500)\n",
"fig.show()"
]
@ -9539,12 +9686,35 @@
}
],
"source": [
"monthly_london_rolling = monthly_london.with_columns(pl.col(\"avg_price\").rolling_mean(window_size=12).alias(\"rolling_12m_avg\"))\n",
"monthly_london_rolling = monthly_london.with_columns(\n",
" pl.col(\"avg_price\").rolling_mean(window_size=12).alias(\"rolling_12m_avg\")\n",
")\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Scatter(x=monthly_london_rolling[\"month\"], y=monthly_london_rolling[\"avg_price\"], name=\"Monthly\", mode=\"lines\", opacity=0.5))\n",
"fig.add_trace(go.Scatter(x=monthly_london_rolling[\"month\"], y=monthly_london_rolling[\"rolling_12m_avg\"], name=\"12-Month Rolling Avg\", mode=\"lines\", line=dict(width=3, color=\"crimson\")))\n",
"fig.update_layout(title=\"London: Monthly Price with 12-Month Rolling Average\", xaxis_title=\"Month\", yaxis_title=\"Average Price (£)\", height=500)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=monthly_london_rolling[\"month\"],\n",
" y=monthly_london_rolling[\"avg_price\"],\n",
" name=\"Monthly\",\n",
" mode=\"lines\",\n",
" opacity=0.5,\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=monthly_london_rolling[\"month\"],\n",
" y=monthly_london_rolling[\"rolling_12m_avg\"],\n",
" name=\"12-Month Rolling Avg\",\n",
" mode=\"lines\",\n",
" line=dict(width=3, color=\"crimson\"),\n",
" )\n",
")\n",
"fig.update_layout(\n",
" title=\"London: Monthly Price with 12-Month Rolling Average\",\n",
" xaxis_title=\"Month\",\n",
" yaxis_title=\"Average Price (£)\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
},
@ -10422,15 +10592,39 @@
"yearly_indexed = yearly_national.with_columns(\n",
" (pl.col(\"avg_price\") / base_price_national * 100).alias(\"national_index\")\n",
").join(\n",
" yearly_london.with_columns((pl.col(\"avg_price\") / base_price_london * 100).alias(\"london_index\")).select(\"year\", \"london_index\"),\n",
" on=\"year\"\n",
" yearly_london.with_columns(\n",
" (pl.col(\"avg_price\") / base_price_london * 100).alias(\"london_index\")\n",
" ).select(\"year\", \"london_index\"),\n",
" on=\"year\",\n",
")\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Scatter(x=yearly_indexed[\"year\"], y=yearly_indexed[\"national_index\"], name=\"National\", mode=\"lines+markers\"))\n",
"fig.add_trace(go.Scatter(x=yearly_indexed[\"year\"], y=yearly_indexed[\"london_index\"], name=\"London\", mode=\"lines+markers\", line=dict(color=\"crimson\")))\n",
"fig.add_hline(y=100, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"1995 Baseline\")\n",
"fig.update_layout(title=\"Price Index (1995 = 100)\", xaxis_title=\"Year\", yaxis_title=\"Index\", height=500)\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=yearly_indexed[\"year\"],\n",
" y=yearly_indexed[\"national_index\"],\n",
" name=\"National\",\n",
" mode=\"lines+markers\",\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=yearly_indexed[\"year\"],\n",
" y=yearly_indexed[\"london_index\"],\n",
" name=\"London\",\n",
" mode=\"lines+markers\",\n",
" line=dict(color=\"crimson\"),\n",
" )\n",
")\n",
"fig.add_hline(\n",
" y=100, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"1995 Baseline\"\n",
")\n",
"fig.update_layout(\n",
" title=\"Price Index (1995 = 100)\",\n",
" xaxis_title=\"Year\",\n",
" yaxis_title=\"Index\",\n",
" height=500,\n",
")\n",
"fig.show()"
]
},
@ -12076,12 +12270,24 @@
" .collect()\n",
")\n",
"\n",
"flats_by_borough = borough_by_type.filter(pl.col(\"property_type\") == \"F\").sort(\"median_price\")\n",
"flats_by_borough = borough_by_type.filter(pl.col(\"property_type\") == \"F\").sort(\n",
" \"median_price\"\n",
")\n",
"\n",
"fig = px.bar(flats_by_borough.to_pandas(), x=\"median_price\", y=\"district\", orientation=\"h\",\n",
"fig = px.bar(\n",
" flats_by_borough.to_pandas(),\n",
" x=\"median_price\",\n",
" y=\"district\",\n",
" orientation=\"h\",\n",
" title=\"Current Median Flat Prices by Borough (2023+)\",\n",
" color=\"count\", color_continuous_scale=\"Viridis\",\n",
" labels={\"median_price\": \"Median Price (£)\", \"district\": \"Borough\", \"count\": \"Sales Volume\"})\n",
" color=\"count\",\n",
" color_continuous_scale=\"Viridis\",\n",
" labels={\n",
" \"median_price\": \"Median Price (£)\",\n",
" \"district\": \"Borough\",\n",
" \"count\": \"Sales Volume\",\n",
" },\n",
")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n",
"fig.show()"
]
@ -13011,14 +13217,21 @@
],
"source": [
"# Heatmap: Price by borough and property type\n",
"pivot_data = borough_by_type.pivot(on=\"type_name\", index=\"district\", values=\"median_price\").sort(\"Flat/Maisonette\", nulls_last=True)\n",
"pivot_data = borough_by_type.pivot(\n",
" on=\"type_name\", index=\"district\", values=\"median_price\"\n",
").sort(\"Flat/Maisonette\", nulls_last=True)\n",
"\n",
"fig = px.imshow(\n",
" pivot_data.select([\"Flat/Maisonette\", \"Terraced\", \"Semi-detached\", \"Detached\"]).to_pandas(),\n",
" pivot_data.select(\n",
" [\"Flat/Maisonette\", \"Terraced\", \"Semi-detached\", \"Detached\"]\n",
" ).to_pandas(),\n",
" y=pivot_data[\"district\"].to_list(),\n",
" x=[\"Flat\", \"Terraced\", \"Semi-detached\", \"Detached\"],\n",
" color_continuous_scale=\"RdYlGn_r\", aspect=\"auto\",\n",
" title=\"Median Price by Borough and Property Type (2023+)\", labels={\"color\": \"Price (£)\"})\n",
" color_continuous_scale=\"RdYlGn_r\",\n",
" aspect=\"auto\",\n",
" title=\"Median Price by Borough and Property Type (2023+)\",\n",
" labels={\"color\": \"Price (£)\"},\n",
")\n",
"fig.update_layout(height=900)\n",
"fig.show()"
]
@ -13083,6 +13296,7 @@
" return ((end_price / start_price) ** (1 / years) - 1) * 100\n",
" return None\n",
"\n",
"\n",
"borough_yearly_all = (\n",
" lf.filter(LONDON_FILTER)\n",
" .with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n",
@ -13101,8 +13315,16 @@
" if len(year_data) > 0:\n",
" prices[year] = year_data[\"median_price\"][0]\n",
" if all(year in prices for year in years_needed):\n",
" growth_data.append({\"district\": borough, \"price_2014\": prices[2014], \"price_2019\": prices[2019], \"price_2024\": prices[2024],\n",
" \"cagr_10yr\": calculate_cagr(prices[2014], prices[2024], 10), \"cagr_5yr\": calculate_cagr(prices[2019], prices[2024], 5)})\n",
" growth_data.append(\n",
" {\n",
" \"district\": borough,\n",
" \"price_2014\": prices[2014],\n",
" \"price_2019\": prices[2019],\n",
" \"price_2024\": prices[2024],\n",
" \"cagr_10yr\": calculate_cagr(prices[2014], prices[2024], 10),\n",
" \"cagr_5yr\": calculate_cagr(prices[2019], prices[2024], 5),\n",
" }\n",
" )\n",
"\n",
"growth_df = pl.DataFrame(growth_data).sort(\"cagr_5yr\", descending=True)\n",
"growth_df"
@ -14077,10 +14299,16 @@
],
"source": [
"# 5-year vs 10-year CAGR scatter\n",
"fig = px.scatter(growth_df.to_pandas(), x=\"cagr_10yr\", y=\"cagr_5yr\", text=\"district\",\n",
"fig = px.scatter(\n",
" growth_df.to_pandas(),\n",
" x=\"cagr_10yr\",\n",
" y=\"cagr_5yr\",\n",
" text=\"district\",\n",
" title=\"Borough Price Growth: 5-Year vs 10-Year CAGR\",\n",
" labels={\"cagr_10yr\": \"10-Year CAGR (%)\", \"cagr_5yr\": \"5-Year CAGR (%)\"},\n",
" color=\"price_2024\", color_continuous_scale=\"Viridis\")\n",
" color=\"price_2024\",\n",
" color_continuous_scale=\"Viridis\",\n",
")\n",
"fig.update_traces(textposition=\"top center\", textfont_size=8)\n",
"fig.add_hline(y=0, line_dash=\"dash\", line_color=\"gray\")\n",
"fig.add_vline(x=0, line_dash=\"dash\", line_color=\"gray\")\n",
@ -15045,11 +15273,20 @@
],
"source": [
"# 5-Year Price Growth Ranking\n",
"fig = px.bar(growth_df.sort(\"cagr_5yr\").to_pandas(), x=\"cagr_5yr\", y=\"district\", orientation=\"h\",\n",
" title=\"5-Year Price Growth by Borough (CAGR %)\", color=\"cagr_5yr\", color_continuous_scale=\"RdYlGn\",\n",
" labels={\"cagr_5yr\": \"5-Year CAGR (%)\", \"district\": \"Borough\"})\n",
"fig = px.bar(\n",
" growth_df.sort(\"cagr_5yr\").to_pandas(),\n",
" x=\"cagr_5yr\",\n",
" y=\"district\",\n",
" orientation=\"h\",\n",
" title=\"5-Year Price Growth by Borough (CAGR %)\",\n",
" color=\"cagr_5yr\",\n",
" color_continuous_scale=\"RdYlGn\",\n",
" labels={\"cagr_5yr\": \"5-Year CAGR (%)\", \"district\": \"Borough\"},\n",
")\n",
"fig.add_vline(x=0, line_dash=\"dash\", line_color=\"black\")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800, showlegend=False)\n",
"fig.update_layout(\n",
" yaxis={\"categoryorder\": \"total ascending\"}, height=800, showlegend=False\n",
")\n",
"fig.show()"
]
},
@ -16199,18 +16436,35 @@
"source": [
"# Property Type Mix by Borough\n",
"property_mix = (\n",
" lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"property_type\").agg(pl.len().alias(\"count\"))\n",
" .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\")).collect()\n",
" lf.filter(LONDON_FILTER)\n",
" .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"property_type\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\"))\n",
" .collect()\n",
")\n",
"totals = property_mix.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"property_mix_pct = property_mix.join(totals, on=\"district\").with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\"))\n",
"flat_pct = property_mix_pct.filter(pl.col(\"property_type\") == \"F\").sort(\"percentage\", descending=True)\n",
"property_mix_pct = property_mix.join(totals, on=\"district\").with_columns(\n",
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\")\n",
")\n",
"flat_pct = property_mix_pct.filter(pl.col(\"property_type\") == \"F\").sort(\n",
" \"percentage\", descending=True\n",
")\n",
"\n",
"fig = px.bar(property_mix_pct.to_pandas(), x=\"percentage\", y=\"district\", color=\"type_name\", orientation=\"h\",\n",
"fig = px.bar(\n",
" property_mix_pct.to_pandas(),\n",
" x=\"percentage\",\n",
" y=\"district\",\n",
" color=\"type_name\",\n",
" orientation=\"h\",\n",
" title=\"Property Type Mix by Borough (2020+)\",\n",
" labels={\"percentage\": \"% of Transactions\", \"district\": \"Borough\", \"type_name\": \"Property Type\"},\n",
" category_orders={\"district\": flat_pct[\"district\"].to_list()})\n",
" labels={\n",
" \"percentage\": \"% of Transactions\",\n",
" \"district\": \"Borough\",\n",
" \"type_name\": \"Property Type\",\n",
" },\n",
" category_orders={\"district\": flat_pct[\"district\"].to_list()},\n",
")\n",
"fig.update_layout(height=900, barmode=\"stack\", legend_title=\"Property Type\")\n",
"fig.show()"
]
@ -17194,19 +17448,38 @@
"source": [
"# Freehold vs Leasehold by Borough\n",
"tenure_mix = (\n",
" lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"duration\").agg(pl.len().alias(\"count\"))\n",
" .with_columns(pl.col(\"duration\").replace(TENURE_MAP).alias(\"tenure_name\")).collect()\n",
" lf.filter(LONDON_FILTER)\n",
" .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"duration\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .with_columns(pl.col(\"duration\").replace(TENURE_MAP).alias(\"tenure_name\"))\n",
" .collect()\n",
")\n",
"totals_tenure = tenure_mix.group_by(\"district\").agg(\n",
" pl.col(\"count\").sum().alias(\"total\")\n",
")\n",
"tenure_mix_pct = tenure_mix.join(totals_tenure, on=\"district\").with_columns(\n",
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\")\n",
")\n",
"freehold_pct = tenure_mix_pct.filter(pl.col(\"duration\") == \"F\").sort(\n",
" \"percentage\", descending=True\n",
")\n",
"totals_tenure = tenure_mix.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"tenure_mix_pct = tenure_mix.join(totals_tenure, on=\"district\").with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\"))\n",
"freehold_pct = tenure_mix_pct.filter(pl.col(\"duration\") == \"F\").sort(\"percentage\", descending=True)\n",
"\n",
"fig = px.bar(tenure_mix_pct.to_pandas(), x=\"percentage\", y=\"district\", color=\"tenure_name\", orientation=\"h\",\n",
"fig = px.bar(\n",
" tenure_mix_pct.to_pandas(),\n",
" x=\"percentage\",\n",
" y=\"district\",\n",
" color=\"tenure_name\",\n",
" orientation=\"h\",\n",
" title=\"Freehold vs Leasehold by Borough (2020+)\",\n",
" labels={\"percentage\": \"% of Transactions\", \"district\": \"Borough\", \"tenure_name\": \"Tenure\"},\n",
" labels={\n",
" \"percentage\": \"% of Transactions\",\n",
" \"district\": \"Borough\",\n",
" \"tenure_name\": \"Tenure\",\n",
" },\n",
" category_orders={\"district\": freehold_pct[\"district\"].to_list()},\n",
" color_discrete_map={\"Freehold\": \"seagreen\", \"Leasehold\": \"coral\"})\n",
" color_discrete_map={\"Freehold\": \"seagreen\", \"Leasehold\": \"coral\"},\n",
")\n",
"fig.update_layout(height=900, barmode=\"stack\")\n",
"fig.show()"
]
@ -18168,19 +18441,39 @@
"source": [
"# Market Activity Change (2024 vs 2019)\n",
"volume_trend = (\n",
" lf.filter(LONDON_FILTER).with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n",
" .filter(pl.col(\"year\") >= 2019).group_by(\"district\", \"year\").agg(pl.len().alias(\"count\")).collect()\n",
" lf.filter(LONDON_FILTER)\n",
" .with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n",
" .filter(pl.col(\"year\") >= 2019)\n",
" .group_by(\"district\", \"year\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .collect()\n",
")\n",
"volume_2019 = volume_trend.filter(pl.col(\"year\") == 2019).select(\n",
" \"district\", pl.col(\"count\").alias(\"count_2019\")\n",
")\n",
"volume_2024 = volume_trend.filter(pl.col(\"year\") == 2024).select(\n",
" \"district\", pl.col(\"count\").alias(\"count_2024\")\n",
")\n",
"volume_change = (\n",
" volume_2019.join(volume_2024, on=\"district\", how=\"inner\")\n",
" .with_columns(\n",
" (\n",
" (pl.col(\"count_2024\") - pl.col(\"count_2019\")) / pl.col(\"count_2019\") * 100\n",
" ).alias(\"pct_change\")\n",
" )\n",
" .sort(\"pct_change\", descending=True)\n",
")\n",
"volume_2019 = volume_trend.filter(pl.col(\"year\") == 2019).select(\"district\", pl.col(\"count\").alias(\"count_2019\"))\n",
"volume_2024 = volume_trend.filter(pl.col(\"year\") == 2024).select(\"district\", pl.col(\"count\").alias(\"count_2024\"))\n",
"volume_change = volume_2019.join(volume_2024, on=\"district\", how=\"inner\").with_columns(\n",
" ((pl.col(\"count_2024\") - pl.col(\"count_2019\")) / pl.col(\"count_2019\") * 100).alias(\"pct_change\")\n",
").sort(\"pct_change\", descending=True)\n",
"\n",
"fig = px.bar(volume_change.to_pandas(), x=\"pct_change\", y=\"district\", orientation=\"h\",\n",
"fig = px.bar(\n",
" volume_change.to_pandas(),\n",
" x=\"pct_change\",\n",
" y=\"district\",\n",
" orientation=\"h\",\n",
" title=\"Market Activity Change: 2024 vs 2019 Transaction Volume (%)\",\n",
" color=\"pct_change\", color_continuous_scale=\"RdYlGn\",\n",
" labels={\"pct_change\": \"Volume Change (%)\", \"district\": \"Borough\"})\n",
" color=\"pct_change\",\n",
" color_continuous_scale=\"RdYlGn\",\n",
" labels={\"pct_change\": \"Volume Change (%)\", \"district\": \"Borough\"},\n",
")\n",
"fig.add_vline(x=0, line_dash=\"dash\", line_color=\"black\")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n",
"fig.show()"
@ -19120,17 +19413,32 @@
"source": [
"# New Build Share by Borough\n",
"new_build_rate = (\n",
" lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"old_new\").agg(pl.len().alias(\"count\")).collect()\n",
" lf.filter(LONDON_FILTER)\n",
" .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n",
" .group_by(\"district\", \"old_new\")\n",
" .agg(pl.len().alias(\"count\"))\n",
" .collect()\n",
")\n",
"totals_nb = new_build_rate.group_by(\"district\").agg(\n",
" pl.col(\"count\").sum().alias(\"total\")\n",
")\n",
"new_build_pct = (\n",
" new_build_rate.filter(pl.col(\"old_new\") == \"Y\")\n",
" .join(totals_nb, on=\"district\")\n",
" .with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"new_build_pct\"))\n",
" .sort(\"new_build_pct\", descending=True)\n",
")\n",
"totals_nb = new_build_rate.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
"new_build_pct = new_build_rate.filter(pl.col(\"old_new\") == \"Y\").join(totals_nb, on=\"district\").with_columns(\n",
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"new_build_pct\")\n",
").sort(\"new_build_pct\", descending=True)\n",
"\n",
"fig = px.bar(new_build_pct.to_pandas(), x=\"new_build_pct\", y=\"district\", orientation=\"h\",\n",
" title=\"New Build Share by Borough (2020+)\", color=\"new_build_pct\", color_continuous_scale=\"Blues\",\n",
" labels={\"new_build_pct\": \"New Build %\", \"district\": \"Borough\"})\n",
"fig = px.bar(\n",
" new_build_pct.to_pandas(),\n",
" x=\"new_build_pct\",\n",
" y=\"district\",\n",
" orientation=\"h\",\n",
" title=\"New Build Share by Borough (2020+)\",\n",
" color=\"new_build_pct\",\n",
" color_continuous_scale=\"Blues\",\n",
" labels={\"new_build_pct\": \"New Build %\", \"district\": \"Borough\"},\n",
")\n",
"fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n",
"fig.show()"
]
@ -20260,21 +20568,66 @@
"source": [
"# Budget Finder: Borough Price Ranges\n",
"recent_prices = (\n",
" lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2023)\n",
" .group_by(\"district\").agg(pl.col(\"price\").quantile(0.25).alias(\"p25\"), pl.col(\"price\").median().alias(\"median\"), pl.col(\"price\").quantile(0.75).alias(\"p75\"))\n",
" .sort(\"median\").collect()\n",
" lf.filter(LONDON_FILTER)\n",
" .filter(pl.col(\"date_of_transfer\").dt.year() >= 2023)\n",
" .group_by(\"district\")\n",
" .agg(\n",
" pl.col(\"price\").quantile(0.25).alias(\"p25\"),\n",
" pl.col(\"price\").median().alias(\"median\"),\n",
" pl.col(\"price\").quantile(0.75).alias(\"p75\"),\n",
" )\n",
" .sort(\"median\")\n",
" .collect()\n",
")\n",
"\n",
"fig = go.Figure()\n",
"fig.add_trace(go.Bar(name=\"25th percentile\", y=recent_prices[\"district\"], x=recent_prices[\"p25\"], orientation=\"h\", marker_color=\"lightgreen\"))\n",
"fig.add_trace(go.Bar(name=\"Median\", y=recent_prices[\"district\"], x=recent_prices[\"median\"] - recent_prices[\"p25\"], orientation=\"h\", marker_color=\"steelblue\", base=recent_prices[\"p25\"]))\n",
"fig.add_trace(go.Bar(name=\"75th percentile\", y=recent_prices[\"district\"], x=recent_prices[\"p75\"] - recent_prices[\"median\"], orientation=\"h\", marker_color=\"coral\", base=recent_prices[\"median\"]))\n",
"fig.add_trace(\n",
" go.Bar(\n",
" name=\"25th percentile\",\n",
" y=recent_prices[\"district\"],\n",
" x=recent_prices[\"p25\"],\n",
" orientation=\"h\",\n",
" marker_color=\"lightgreen\",\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" name=\"Median\",\n",
" y=recent_prices[\"district\"],\n",
" x=recent_prices[\"median\"] - recent_prices[\"p25\"],\n",
" orientation=\"h\",\n",
" marker_color=\"steelblue\",\n",
" base=recent_prices[\"p25\"],\n",
" )\n",
")\n",
"fig.add_trace(\n",
" go.Bar(\n",
" name=\"75th percentile\",\n",
" y=recent_prices[\"district\"],\n",
" x=recent_prices[\"p75\"] - recent_prices[\"median\"],\n",
" orientation=\"h\",\n",
" marker_color=\"coral\",\n",
" base=recent_prices[\"median\"],\n",
" )\n",
")\n",
"\n",
"for budget in [300000, 500000, 750000, 1000000]:\n",
" fig.add_vline(x=budget, line_dash=\"dash\", line_color=\"gray\", annotation_text=f\"£{budget//1000}k\", annotation_position=\"top\")\n",
" fig.add_vline(\n",
" x=budget,\n",
" line_dash=\"dash\",\n",
" line_color=\"gray\",\n",
" annotation_text=f\"£{budget // 1000}k\",\n",
" annotation_position=\"top\",\n",
" )\n",
"\n",
"fig.update_layout(title=\"Borough Price Ranges (2023+) - Find Your Budget\", xaxis_title=\"Price (£)\",\n",
" yaxis={\"categoryorder\": \"total ascending\"}, barmode=\"stack\", height=900, legend_title=\"Price Point\")\n",
"fig.update_layout(\n",
" title=\"Borough Price Ranges (2023+) - Find Your Budget\",\n",
" xaxis_title=\"Price (£)\",\n",
" yaxis={\"categoryorder\": \"total ascending\"},\n",
" barmode=\"stack\",\n",
" height=900,\n",
" legend_title=\"Price Point\",\n",
")\n",
"fig.show()"
]
},
@ -20288,18 +20641,43 @@
"comparison_data = (\n",
" recent_prices.select(\"district\", \"median\")\n",
" .join(growth_df.select(\"district\", \"cagr_5yr\"), on=\"district\", how=\"left\")\n",
" .join(freehold_pct.select(\"district\", \"percentage\").rename({\"percentage\": \"freehold_pct\"}), on=\"district\", how=\"left\")\n",
" .join(\n",
" freehold_pct.select(\"district\", \"percentage\").rename(\n",
" {\"percentage\": \"freehold_pct\"}\n",
" ),\n",
" on=\"district\",\n",
" how=\"left\",\n",
" )\n",
" .join(volume_change.select(\"district\", \"count_2024\"), on=\"district\", how=\"left\")\n",
" .join(new_build_pct.select(\"district\", \"new_build_pct\"), on=\"district\", how=\"left\")\n",
")\n",
"\n",
"comparison_normalized = comparison_data.with_columns([\n",
" (100 - (pl.col(\"median\") - pl.col(\"median\").min()) / (pl.col(\"median\").max() - pl.col(\"median\").min()) * 100).alias(\"affordability_score\"),\n",
" ((pl.col(\"cagr_5yr\") - pl.col(\"cagr_5yr\").min()) / (pl.col(\"cagr_5yr\").max() - pl.col(\"cagr_5yr\").min()) * 100).alias(\"growth_score\"),\n",
" pl.col(\"freehold_pct\").alias(\"freehold_score\"),\n",
" ((pl.col(\"count_2024\") - pl.col(\"count_2024\").min()) / (pl.col(\"count_2024\").max() - pl.col(\"count_2024\").min()) * 100).alias(\"liquidity_score\"),\n",
" ((pl.col(\"new_build_pct\") - pl.col(\"new_build_pct\").min()) / (pl.col(\"new_build_pct\").max() - pl.col(\"new_build_pct\").min()) * 100).alias(\"development_score\"),\n",
"])"
"comparison_normalized = comparison_data.with_columns(\n",
" [\n",
" (\n",
" 100\n",
" - (pl.col(\"median\") - pl.col(\"median\").min())\n",
" / (pl.col(\"median\").max() - pl.col(\"median\").min())\n",
" * 100\n",
" ).alias(\"affordability_score\"),\n",
" (\n",
" (pl.col(\"cagr_5yr\") - pl.col(\"cagr_5yr\").min())\n",
" / (pl.col(\"cagr_5yr\").max() - pl.col(\"cagr_5yr\").min())\n",
" * 100\n",
" ).alias(\"growth_score\"),\n",
" pl.col(\"freehold_pct\").alias(\"freehold_score\"),\n",
" (\n",
" (pl.col(\"count_2024\") - pl.col(\"count_2024\").min())\n",
" / (pl.col(\"count_2024\").max() - pl.col(\"count_2024\").min())\n",
" * 100\n",
" ).alias(\"liquidity_score\"),\n",
" (\n",
" (pl.col(\"new_build_pct\") - pl.col(\"new_build_pct\").min())\n",
" / (pl.col(\"new_build_pct\").max() - pl.col(\"new_build_pct\").min())\n",
" * 100\n",
" ).alias(\"development_score\"),\n",
" ]\n",
")"
]
},
{
@ -21228,14 +21606,38 @@
],
"source": [
"# Borough Scores Heatmap\n",
"scores_only = comparison_normalized.select([\"district\", \"affordability_score\", \"growth_score\", \"freehold_score\", \"liquidity_score\", \"development_score\"]).drop_nulls().sort(\"affordability_score\", descending=True)\n",
"scores_only = (\n",
" comparison_normalized.select(\n",
" [\n",
" \"district\",\n",
" \"affordability_score\",\n",
" \"growth_score\",\n",
" \"freehold_score\",\n",
" \"liquidity_score\",\n",
" \"development_score\",\n",
" ]\n",
" )\n",
" .drop_nulls()\n",
" .sort(\"affordability_score\", descending=True)\n",
")\n",
"\n",
"fig = px.imshow(\n",
" scores_only.select([\"affordability_score\", \"growth_score\", \"freehold_score\", \"liquidity_score\", \"development_score\"]).to_pandas(),\n",
" scores_only.select(\n",
" [\n",
" \"affordability_score\",\n",
" \"growth_score\",\n",
" \"freehold_score\",\n",
" \"liquidity_score\",\n",
" \"development_score\",\n",
" ]\n",
" ).to_pandas(),\n",
" y=scores_only[\"district\"].to_list(),\n",
" x=[\"Affordability\", \"5yr Growth\", \"Freehold %\", \"Liquidity\", \"New Builds\"],\n",
" color_continuous_scale=\"RdYlGn\", aspect=\"auto\",\n",
" title=\"Borough Scores Heatmap (Higher = Better)\", labels={\"color\": \"Score\"})\n",
" color_continuous_scale=\"RdYlGn\",\n",
" aspect=\"auto\",\n",
" title=\"Borough Scores Heatmap (Higher = Better)\",\n",
" labels={\"color\": \"Score\"},\n",
")\n",
"fig.update_layout(height=900)\n",
"fig.show()"
]
@ -22258,17 +22660,49 @@
"median_afford = value_growth[\"affordability_score\"].median()\n",
"median_growth = value_growth[\"growth_score\"].median()\n",
"\n",
"fig = px.scatter(value_growth.to_pandas(), x=\"affordability_score\", y=\"growth_score\", text=\"district\",\n",
" size=\"liquidity_score\", color=\"freehold_score\", color_continuous_scale=\"Viridis\",\n",
"fig = px.scatter(\n",
" value_growth.to_pandas(),\n",
" x=\"affordability_score\",\n",
" y=\"growth_score\",\n",
" text=\"district\",\n",
" size=\"liquidity_score\",\n",
" color=\"freehold_score\",\n",
" color_continuous_scale=\"Viridis\",\n",
" title=\"Value vs Growth Quadrant Analysis\",\n",
" labels={\"affordability_score\": \"Affordability (higher=cheaper)\", \"growth_score\": \"5yr Growth (higher=faster)\", \"liquidity_score\": \"Liquidity\", \"freehold_score\": \"Freehold %\"})\n",
" labels={\n",
" \"affordability_score\": \"Affordability (higher=cheaper)\",\n",
" \"growth_score\": \"5yr Growth (higher=faster)\",\n",
" \"liquidity_score\": \"Liquidity\",\n",
" \"freehold_score\": \"Freehold %\",\n",
" },\n",
")\n",
"\n",
"fig.add_hline(y=median_growth, line_dash=\"dash\", line_color=\"gray\")\n",
"fig.add_vline(x=median_afford, line_dash=\"dash\", line_color=\"gray\")\n",
"fig.add_annotation(x=75, y=85, text=\"Best Value<br>(Affordable+Growing)\", showarrow=False, font=dict(size=11))\n",
"fig.add_annotation(x=25, y=85, text=\"Premium Growth<br>(Expensive+Growing)\", showarrow=False, font=dict(size=11))\n",
"fig.add_annotation(x=75, y=15, text=\"Stable & Affordable<br>(Cheap+Steady)\", showarrow=False, font=dict(size=11))\n",
"fig.add_annotation(x=25, y=15, text=\"Caution<br>(Expensive+Slow)\", showarrow=False, font=dict(size=11))\n",
"fig.add_annotation(\n",
" x=75,\n",
" y=85,\n",
" text=\"Best Value<br>(Affordable+Growing)\",\n",
" showarrow=False,\n",
" font=dict(size=11),\n",
")\n",
"fig.add_annotation(\n",
" x=25,\n",
" y=85,\n",
" text=\"Premium Growth<br>(Expensive+Growing)\",\n",
" showarrow=False,\n",
" font=dict(size=11),\n",
")\n",
"fig.add_annotation(\n",
" x=75,\n",
" y=15,\n",
" text=\"Stable & Affordable<br>(Cheap+Steady)\",\n",
" showarrow=False,\n",
" font=dict(size=11),\n",
")\n",
"fig.add_annotation(\n",
" x=25, y=15, text=\"Caution<br>(Expensive+Slow)\", showarrow=False, font=dict(size=11)\n",
")\n",
"fig.update_traces(textposition=\"top center\", textfont_size=8)\n",
"fig.update_layout(height=700)\n",
"fig.show()"
@ -22332,25 +22766,43 @@
],
"source": [
"# Borough Rankings with Composite Score\n",
"WEIGHTS = {\"affordability\": 0.30, \"growth\": 0.25, \"freehold\": 0.15, \"liquidity\": 0.15, \"development\": 0.15}\n",
"WEIGHTS = {\n",
" \"affordability\": 0.30,\n",
" \"growth\": 0.25,\n",
" \"freehold\": 0.15,\n",
" \"liquidity\": 0.15,\n",
" \"development\": 0.15,\n",
"}\n",
"\n",
"ranking = (\n",
" comparison_normalized.drop_nulls()\n",
" .with_columns(\n",
" (pl.col(\"affordability_score\") * WEIGHTS[\"affordability\"] +\n",
" pl.col(\"growth_score\") * WEIGHTS[\"growth\"] +\n",
" pl.col(\"freehold_score\") * WEIGHTS[\"freehold\"] +\n",
" pl.col(\"liquidity_score\") * WEIGHTS[\"liquidity\"] +\n",
" pl.col(\"development_score\") * WEIGHTS[\"development\"]).alias(\"composite_score\")\n",
" (\n",
" pl.col(\"affordability_score\") * WEIGHTS[\"affordability\"]\n",
" + pl.col(\"growth_score\") * WEIGHTS[\"growth\"]\n",
" + pl.col(\"freehold_score\") * WEIGHTS[\"freehold\"]\n",
" + pl.col(\"liquidity_score\") * WEIGHTS[\"liquidity\"]\n",
" + pl.col(\"development_score\") * WEIGHTS[\"development\"]\n",
" ).alias(\"composite_score\")\n",
" )\n",
" .select(\n",
" [\n",
" \"district\",\n",
" pl.col(\"median\").alias(\"median_price_2023\"),\n",
" \"cagr_5yr\",\n",
" \"freehold_pct\",\n",
" \"composite_score\",\n",
" ]\n",
" )\n",
" .select([\"district\", pl.col(\"median\").alias(\"median_price_2023\"), \"cagr_5yr\", \"freehold_pct\", \"composite_score\"])\n",
" .sort(\"composite_score\", descending=True)\n",
" .with_row_index(\"rank\", offset=1)\n",
")\n",
"\n",
"print(\"Top 15 Boroughs by Composite Score:\")\n",
"print(\"Weights: Affordability 30%, Growth 25%, Freehold 15%, Liquidity 15%, Development 15%\")\n",
"print(\"=\"*80)\n",
"print(\n",
" \"Weights: Affordability 30%, Growth 25%, Freehold 15%, Liquidity 15%, Development 15%\"\n",
")\n",
"print(\"=\" * 80)\n",
"ranking.head(15)"
]
}