Format python

This commit is contained in:
Andras Schmelczer 2026-01-31 13:07:09 +00:00
parent 85f5770e09
commit 4c258018c3
17 changed files with 348 additions and 248 deletions

View file

@ -70,7 +70,9 @@ def _count_pois_per_postcode(
pc_codes = postcodes_df["postcode"].to_list()
# Initialize result arrays
result_counts = {group: np.zeros(n_postcodes, dtype=np.int32) for group in POI_GROUPS}
result_counts = {
group: np.zeros(n_postcodes, dtype=np.int32) for group in POI_GROUPS
}
# Process in batches with progress
batch_size = 50000
@ -83,7 +85,9 @@ def _count_pois_per_postcode(
end_idx = min(start_idx + batch_size, n_postcodes)
if batch_idx % 5 == 0:
print(f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}")
print(
f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}"
)
# Process batch
for i in range(start_idx, end_idx):
@ -109,12 +113,7 @@ def _count_pois_per_postcode(
nearby = np.concatenate(nearby_indices)
# Vectorized distance calculation for all nearby POIs
distances = haversine_km(
poi_lats[nearby],
poi_lngs[nearby],
pc_lat,
pc_lon
)
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lat, pc_lon)
# Filter by radius
within_mask = distances <= radius_km
@ -147,13 +146,13 @@ def count_pois_within_radius(
"""
# Get unique postcodes with coordinates
print("Deduplicating postcodes...")
unique_postcodes = (
properties
.select(["postcode", "lat", "lon"])
.unique(subset=["postcode"])
unique_postcodes = properties.select(["postcode", "lat", "lon"]).unique(
subset=["postcode"]
)
print(f" {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes")
print(
f" {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes"
)
# Count POIs per postcode
postcode_counts = _count_pois_per_postcode(unique_postcodes, pois, radius_km)
@ -174,11 +173,7 @@ def count_pois_within_radius(
result_lazy = (
properties.lazy()
.select("postcode")
.join(
pl.scan_parquet(tmp_path),
on="postcode",
how="left"
)
.join(pl.scan_parquet(tmp_path), on="postcode", how="left")
.select(count_cols)
.fill_null(0)
)