Rust things
This commit is contained in:
parent
fc10381692
commit
3debacab4f
30 changed files with 3257 additions and 647 deletions
|
|
@ -97,7 +97,7 @@ fn build_search_text(name: &str, place_type: &str) -> String {
|
|||
}
|
||||
|
||||
if place_type == "station" {
|
||||
let suffix_aliases: [(&str, &[&str]); 5] = [
|
||||
let suffix_aliases: [(&str, &[&str]); 6] = [
|
||||
(
|
||||
" tube station",
|
||||
&[" underground station", " station", " tube", " underground"],
|
||||
|
|
@ -118,6 +118,7 @@ fn build_search_text(name: &str, place_type: &str) -> String {
|
|||
" elizabeth line station",
|
||||
&[" station", " elizabeth line", " crossrail station"],
|
||||
),
|
||||
(" dlr station", &[" station", " dlr"]),
|
||||
];
|
||||
|
||||
for (suffix, replacements) in suffix_aliases {
|
||||
|
|
@ -139,10 +140,15 @@ fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
|||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
string_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or("").to_string())
|
||||
.collect())
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
value
|
||||
.map(ToString::to_string)
|
||||
.with_context(|| format!("Column '{name}' has null at row {row}"))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
|
||||
|
|
@ -155,33 +161,37 @@ fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
|
|||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(float_column
|
||||
float_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(0.0))
|
||||
.collect())
|
||||
.enumerate()
|
||||
.map(|(row, value)| value.with_context(|| format!("Column '{name}' has null at row {row}")))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_bool_col_or_default(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
default_value: bool,
|
||||
) -> anyhow::Result<Vec<bool>> {
|
||||
let Ok(column) = df.column(name) else {
|
||||
return Ok(vec![default_value; df.height()]);
|
||||
};
|
||||
fn extract_bool_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<bool>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in places data"))?;
|
||||
let bool_column = column
|
||||
.bool()
|
||||
.with_context(|| format!("Column '{name}' is not a boolean column"))?;
|
||||
Ok(bool_column
|
||||
bool_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(default_value))
|
||||
.collect())
|
||||
.enumerate()
|
||||
.map(|(row, value)| value.with_context(|| format!("Column '{name}' has null at row {row}")))
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl PlaceData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
}
|
||||
|
||||
fn load_inner(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading place data from {:?}...", parquet_path);
|
||||
|
||||
let parquet_path = PlRefPath::try_from_path(parquet_path)
|
||||
.context("Failed to normalize places parquet path")?;
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan places parquet")?
|
||||
.collect()
|
||||
|
|
@ -210,7 +220,7 @@ impl PlaceData {
|
|||
let type_rank_vec: Vec<u8> = place_type_raw.iter().map(|pt| type_rank(pt)).collect();
|
||||
let place_type = InternedColumn::build(&place_type_raw);
|
||||
let travel_destination = if df.column("travel_destination").is_ok() {
|
||||
extract_bool_col_or_default(&df, "travel_destination", true)?
|
||||
extract_bool_col(&df, "travel_destination")?
|
||||
} else {
|
||||
place_type_raw
|
||||
.iter()
|
||||
|
|
@ -296,6 +306,7 @@ mod tests {
|
|||
assert!(build_search_text("King's Cross tube station", "station")
|
||||
.contains("kings cross underground"));
|
||||
assert!(build_search_text("St Albans", "city").contains("saint albans"));
|
||||
assert!(build_search_text("Shadwell DLR station", "station").contains("shadwell station"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue