Get the original datatype values from a vector of AnyValues

873 views Asked by At

I can convert many datatypes into the AnyValue enum in Rust Polars. but how do I convert them back to the original datatypes?

use polars::prelude::*;

fn main() {
    let df = df!(  "Fruit" => &["Apple", "Apple", "Pear"],
                   "Boolean" => &[true,false,true],
                   "Float64" => &[1.1,321.45,101.445])
    .unwrap();

    // get row 1 of the DataFrame as a vector of AnyValues
    let vec_anyvalue = df.get(1).unwrap();

    // trying to get the actual values:

    // getting the fruit as a String kind of works (get quotation marks too)
    let fruit = vec_anyvalue.get(0).unwrap().to_string();

    // getting the bool or the float value does not ?!

    // ERROR:  the trait `From<&AnyValue<'_>>` is not implemented for `bool`
    let boolvalue: bool = vec_anyvalue.get(1).unwrap().into();
    // ERROR:  the trait `From<AnyValue<'_>>` is not implemented for `f64`
    let floatvalue: f64 = vec_anyvalue.get(2).unwrap().into();
}


2

There are 2 answers

6
Gnurfos On BEST ANSWER

I think you have to write a converter yourself:

fn to_bool<'a>(v: &AnyValue<'a>) -> bool {
    if let AnyValue::Boolean(b) = v {
        *b
    } else {
        panic!("not a boolean");
    }
}

(or a variation returning Option/Result)

0
Claudio Fsr On

I was able to get the DataFrame values ​​using Series or AnyValue.

// Rust version 1.67.1 ; polars version 0.27.2
use polars::prelude::*;
use std::error::Error;

fn main() -> Result<(), Box<dyn Error>> {
    let s0 = Series::new("a", &[1.2, 2.2, 3.3]);
    let s1 = Series::new("b", &[7.6, 1.2]);
    let s2 = Series::new("c", &[4.4, -5.07, 99.3, 55.2]);
    // construct a new ListChunked for a slice of Series.
    let list = Series::new("ListFloat64", &[s0, s1, s2]);

    // construct a few more Series.
    let s0 = Series::new("Fruit", ["Apple", "Apple", "Pear"]);
    let s1 = Series::new("Boolean", [true, false, true]);
    let s2 = Series::new("Float64", [1.1, 321.45, 101.445]);

    let dataframe = DataFrame::new(vec![s0, s1, s2, list])?;

    println!("dataframe:\n{dataframe}\n");

    // I was able to get the DataFrame values ​​using Series.

    let column_string: &Series = dataframe.column("Fruit")?;
    let column_boolean: &Series = dataframe.column("Boolean")?;
    let column_float64: &Series = dataframe.column("Float64")?;
    let column_listfloat64: &Series = dataframe.column("ListFloat64")?;

    // Get rows from columns with into_iter()

    let vec_opt_str: Vec<Option<&str>> = column_string.utf8()?.into_iter().collect();
    let vec_opt_bool: Vec<Option<bool>> = column_boolean.bool()?.into_iter().collect();
    let vec_opt_f64: Vec<Option<f64>> = column_float64.f64()?.into_iter().collect();
    let vec_opt_series: Vec<Option<Series>> = column_listfloat64.list()?.into_iter().collect();

    // Remove intermadiate Options: Vec<Option<Type>> --> Vec<Type>

    let vec_strings: Vec<&str> = get_vec_type(vec_opt_str);
    let vec_boolean: Vec<bool> = get_vec_type(vec_opt_bool);
    let vec_float64: Vec<f64> = get_vec_type(vec_opt_f64);
    let vec_series: Vec<Series> = get_vec_type(vec_opt_series);

    for row in 0..dataframe.height() {
        println!("Show data from row {row}:");

        let value_string: String = vec_strings[row].to_string();
        println!("\tvalue_string: {value_string}");

        let value_bool: bool = vec_boolean[row];
        println!("\tvalue_bool: {value_bool}");

        let value_f64: f64 = vec_float64[row];
        println!("\tvalue_f64: {value_f64}");

        let series: &Series = &vec_series[row];
        let vec_opt_f64_new: Vec<Option<f64>> = series.f64()?.into_iter().collect();
        let vec_float64_new: Vec<f64> = get_vec_type(vec_opt_f64_new);
        println!("\tvec_list_f64: {vec_float64_new:?}\n");
    }

    let row = 1;
    let column = 2;

    println!("Using Vec<AnyValue>, row = {row}, column = {column}:");
    let vec_anyvalue: Vec<AnyValue> = dataframe.get(row).unwrap();
    let floatvalue: f64 = vec_anyvalue.get(column).unwrap().try_extract::<f64>()?;
    println!("floatvalue: {floatvalue}\n");

    println!("Using AnyValue, row = {row}:");
    let anyvalue: AnyValue = dataframe.column("Float64")?.iter().nth(row).unwrap();
    let value_f64 = anyvalue.try_extract::<f64>()?;
    println!("value_f64: {value_f64}");

    Ok(())
}

Such that, get_vec_type:

/// Remove intermadiate Option
fn get_vec_type<T>(vec_opt_type: Vec<Option<T>>) -> Vec<T> 
    where T: std::default::Default
{
    vec_opt_type
        //.into_no_null_iter() // if we are certain we don't have missing values
        .into_iter()
        .map(|opt_type| opt_type.unwrap_or_default())
        .collect()
}

The final result:

dataframe:
shape: (3, 4)
┌───────┬─────────┬─────────┬────────────────────────┐
│ Fruit ┆ Boolean ┆ Float64 ┆ ListFloat64            │
│ ---   ┆ ---     ┆ ---     ┆ ---                    │
│ str   ┆ bool    ┆ f64     ┆ list[f64]              │
╞═══════╪═════════╪═════════╪════════════════════════╡
│ Apple ┆ true    ┆ 1.1     ┆ [1.2, 2.2, 3.3]        │
│ Apple ┆ false   ┆ 321.45  ┆ [7.6, 1.2]             │
│ Pear  ┆ true    ┆ 101.445 ┆ [4.4, -5.07, ... 55.2] │
└───────┴─────────┴─────────┴────────────────────────┘

Show data from row 0:
    value_string: Apple
    value_bool: true
    value_f64: 1.1
    vec_list_f64: [1.2, 2.2, 3.3]

Show data from row 1:
    value_string: Apple
    value_bool: false
    value_f64: 321.45
    vec_list_f64: [7.6, 1.2]

Show data from row 2:
    value_string: Pear
    value_bool: true
    value_f64: 101.445
    vec_list_f64: [4.4, -5.07, 99.3, 55.2]

Using Vec<AnyValue>, row = 1, column = 2:
floatvalue: 321.45

Using AnyValue, row = 1:
value_f64: 321.45