Small fixes
This commit is contained in:
parent
d93beb9201
commit
7591e5fc05
12 changed files with 198 additions and 14 deletions
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
|||
import polars as pl
|
||||
|
||||
from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY
|
||||
from transform import normalize_price
|
||||
from transform import map_property_type, normalize_price
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
|
@ -43,6 +43,19 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
MAX_BEDROOMS,
|
||||
)
|
||||
|
||||
# Re-derive Property type from Property sub-type using current PROPERTY_TYPE_MAP.
|
||||
# This retroactively fixes data scraped with older versions of the type map.
|
||||
remapped = 0
|
||||
for p in properties:
|
||||
sub_type = p.get("Property sub-type", "")
|
||||
if sub_type and sub_type != "Unknown":
|
||||
new_type = map_property_type(sub_type)
|
||||
if new_type != p.get("Property type"):
|
||||
p["Property type"] = new_type
|
||||
remapped += 1
|
||||
if remapped:
|
||||
log.info("Re-mapped %d property types from sub-types", remapped)
|
||||
|
||||
# Parse first_visible_date to datetime
|
||||
listing_dates = []
|
||||
for p in properties:
|
||||
|
|
@ -56,7 +69,27 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
listing_dates.append(dt)
|
||||
except (ValueError, TypeError):
|
||||
listing_dates.append(None)
|
||||
# Try additional date formats (OpenRent: "DD Month, YYYY", "Today")
|
||||
parsed = None
|
||||
stripped = fvd.strip()
|
||||
lower = stripped.lower()
|
||||
if lower == "today":
|
||||
parsed = datetime.now().replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
elif lower == "tomorrow":
|
||||
from datetime import timedelta
|
||||
parsed = (
|
||||
datetime.now() + timedelta(days=1)
|
||||
).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
else:
|
||||
for fmt in ("%d %B, %Y", "%d %B %Y"):
|
||||
try:
|
||||
parsed = datetime.strptime(stripped, fmt)
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
listing_dates.append(parsed)
|
||||
else:
|
||||
listing_dates.append(None)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue