Extarct utils
This commit is contained in:
parent
0153e46478
commit
e1b38a1b95
8 changed files with 458 additions and 25 deletions
|
|
@ -8,6 +8,7 @@ from tqdm import tqdm
|
|||
from .config import DESTINATIONS, MAX_CONCURRENT, MAX_POSTCODES, OUTPUT_DIR, MAX_DISTANCE_KM
|
||||
from .results import CheckpointSaver, results_to_dataframe, save_results
|
||||
from .tfl_client import fetch_journey_times
|
||||
from pipeline.utils import haversine_km_expr
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -28,31 +29,9 @@ def main():
|
|||
postcodes_df = pl.read_parquet(OUTPUT_DIR / "postcodes_h3.parquet")
|
||||
print(f"Loaded {postcodes_df.height:,} postcodes")
|
||||
|
||||
# Filter to postcodes within 150km of destination using Haversine formula
|
||||
earth_radius_km = 6371
|
||||
|
||||
dest_lat_rad = destination.lat * 3.14159265359 / 180
|
||||
dest_lon_rad = destination.lon * 3.14159265359 / 180
|
||||
|
||||
# Filter to postcodes within range of destination
|
||||
postcodes_df = postcodes_df.with_columns(
|
||||
(
|
||||
2
|
||||
* earth_radius_km
|
||||
* (
|
||||
(
|
||||
((pl.lit(dest_lat_rad) - pl.col("lat") * 3.14159265359 / 180) / 2).sin()
|
||||
** 2
|
||||
+ pl.lit(dest_lat_rad).cos()
|
||||
* (pl.col("lat") * 3.14159265359 / 180).cos()
|
||||
* (
|
||||
(pl.lit(dest_lon_rad) - pl.col("long") * 3.14159265359 / 180) / 2
|
||||
).sin()
|
||||
** 2
|
||||
)
|
||||
.sqrt()
|
||||
.arcsin()
|
||||
)
|
||||
).alias("distance_km")
|
||||
haversine_km_expr("lat", "long", destination.lat, destination.lon).alias("distance_km")
|
||||
).filter(pl.col("distance_km") <= MAX_DISTANCE_KM)
|
||||
|
||||
print(f"Filtered to {postcodes_df.height:,} postcodes within {MAX_DISTANCE_KM}km")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue