147 lines
5.4 KiB
Python
147 lines
5.4 KiB
Python
import numpy as np
|
|
import polars as pl
|
|
import pytest
|
|
|
|
from pipeline.utils.haversine import haversine_km, haversine_km_expr
|
|
|
|
|
|
class TestHaversineKm:
|
|
"""Test numpy-based haversine distance calculation."""
|
|
|
|
def test_same_point(self):
|
|
"""Distance from a point to itself should be zero."""
|
|
lat = np.array([51.5074])
|
|
lon = np.array([-0.1278])
|
|
dist = haversine_km(lat, lon, 51.5074, -0.1278)
|
|
assert np.allclose(dist, 0.0, atol=1e-10)
|
|
|
|
def test_known_distance_london_to_paris(self):
|
|
"""Test distance from London to Paris (~344 km)."""
|
|
# London coordinates
|
|
london_lat = np.array([51.5074])
|
|
london_lon = np.array([-0.1278])
|
|
# Paris coordinates
|
|
paris_lat = 48.8566
|
|
paris_lon = 2.3522
|
|
|
|
dist = haversine_km(london_lat, london_lon, paris_lat, paris_lon)
|
|
# Expected distance is approximately 344 km
|
|
assert np.allclose(dist[0], 344, rtol=0.01)
|
|
|
|
def test_known_distance_new_york_to_london(self):
|
|
"""Test distance from New York to London (~5570 km)."""
|
|
ny_lat = np.array([40.7128])
|
|
ny_lon = np.array([-74.0060])
|
|
london_lat = 51.5074
|
|
london_lon = -0.1278
|
|
|
|
dist = haversine_km(ny_lat, ny_lon, london_lat, london_lon)
|
|
# Expected distance is approximately 5570 km
|
|
assert np.allclose(dist[0], 5570, rtol=0.01)
|
|
|
|
def test_multiple_points(self):
|
|
"""Test calculating distances from multiple points to a single destination."""
|
|
lats = np.array([51.5074, 48.8566, 40.7128]) # London, Paris, NYC
|
|
lons = np.array([-0.1278, 2.3522, -74.0060])
|
|
# Distance to Edinburgh
|
|
edinburgh_lat = 55.9533
|
|
edinburgh_lon = -3.1883
|
|
|
|
dists = haversine_km(lats, lons, edinburgh_lat, edinburgh_lon)
|
|
|
|
# All distances should be positive
|
|
assert np.all(dists > 0)
|
|
# London to Edinburgh should be shortest (~530 km)
|
|
assert dists[0] < dists[1] < dists[2]
|
|
assert np.allclose(dists[0], 530, rtol=0.02)
|
|
|
|
def test_equator_points(self):
|
|
"""Test distance along the equator."""
|
|
# Two points on the equator, 1 degree apart
|
|
lat = np.array([0.0])
|
|
lon1 = np.array([0.0])
|
|
lon2 = 1.0
|
|
|
|
dist = haversine_km(lat, lon1, 0.0, lon2)
|
|
# 1 degree at equator ≈ 111 km
|
|
assert np.allclose(dist[0], 111.2, rtol=0.01)
|
|
|
|
|
|
class TestHaversineKmExpr:
|
|
"""Test Polars expression-based haversine distance calculation."""
|
|
|
|
def test_same_point(self):
|
|
"""Distance from a point to itself should be zero."""
|
|
df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
|
|
result = df.select(
|
|
haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
|
|
)
|
|
assert result["dist"][0] == pytest.approx(0.0, abs=1e-10)
|
|
|
|
def test_known_distance_london_to_paris(self):
|
|
"""Test distance from London to Paris (~344 km)."""
|
|
df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
|
|
result = df.select(
|
|
haversine_km_expr("lat", "lon", 48.8566, 2.3522).alias("dist")
|
|
)
|
|
assert result["dist"][0] == pytest.approx(344, rel=0.01)
|
|
|
|
def test_known_distance_new_york_to_london(self):
|
|
"""Test distance from New York to London (~5570 km)."""
|
|
df = pl.DataFrame({"lat": [40.7128], "lon": [-74.0060]})
|
|
result = df.select(
|
|
haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
|
|
)
|
|
assert result["dist"][0] == pytest.approx(5570, rel=0.01)
|
|
|
|
def test_multiple_points(self):
|
|
"""Test calculating distances from multiple points to a single destination."""
|
|
df = pl.DataFrame(
|
|
{
|
|
"lat": [51.5074, 48.8566, 40.7128], # London, Paris, NYC
|
|
"lon": [-0.1278, 2.3522, -74.0060],
|
|
}
|
|
)
|
|
# Distance to Edinburgh
|
|
result = df.select(
|
|
haversine_km_expr("lat", "lon", 55.9533, -3.1883).alias("dist")
|
|
)
|
|
|
|
dists = result["dist"].to_numpy()
|
|
# All distances should be positive
|
|
assert np.all(dists > 0)
|
|
# London to Edinburgh should be shortest (~530 km)
|
|
assert dists[0] < dists[1] < dists[2]
|
|
assert dists[0] == pytest.approx(530, rel=0.02)
|
|
|
|
def test_equator_points(self):
|
|
"""Test distance along the equator."""
|
|
df = pl.DataFrame({"lat": [0.0], "lon": [0.0]})
|
|
result = df.select(haversine_km_expr("lat", "lon", 0.0, 1.0).alias("dist"))
|
|
# 1 degree at equator ≈ 111 km
|
|
assert result["dist"][0] == pytest.approx(111.2, rel=0.01)
|
|
|
|
|
|
class TestHaversineConsistency:
|
|
"""Test that both implementations give consistent results."""
|
|
|
|
def test_numpy_and_polars_match(self):
|
|
"""Both implementations should give identical results."""
|
|
# Test data
|
|
lats = np.array([51.5074, 48.8566, 40.7128, 55.9533, 52.5200])
|
|
lons = np.array([-0.1278, 2.3522, -74.0060, -3.1883, 13.4050])
|
|
dest_lat = 41.9028 # Rome
|
|
dest_lon = 12.4964
|
|
|
|
# Numpy version
|
|
numpy_dists = haversine_km(lats, lons, dest_lat, dest_lon)
|
|
|
|
# Polars version
|
|
df = pl.DataFrame({"lat": lats, "lon": lons})
|
|
polars_result = df.select(
|
|
haversine_km_expr("lat", "lon", dest_lat, dest_lon).alias("dist")
|
|
)
|
|
polars_dists = polars_result["dist"].to_numpy()
|
|
|
|
# Should be identical (or at least very close due to floating point)
|
|
assert np.allclose(numpy_dists, polars_dists, rtol=1e-10)
|