Update convert_pandas_nullable

Automatically convert pandas Float64 to float64 when required
Avoid unnecessary copy if possible
This commit is contained in:
RunasSudo 2022-11-09 17:03:45 +11:00
parent ab90cfc0e4
commit 50597ddc74
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A

View File

@ -45,14 +45,18 @@ def check_nan(df, nan_policy):
def convert_pandas_nullable(df): def convert_pandas_nullable(df):
"""Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes""" """Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes"""
# TODO: Can we avoid this copy? # Avoid copy if possible
df = df.copy() df_cleaned = None
for col in df.columns: for col in df.columns:
if df[col].dtype == 'Int64': if df[col].dtype in ('Int64', 'Float64'):
df[col] = df[col].astype('int') if df_cleaned is None:
df_cleaned = df.copy()
df_cleaned[col] = df[col].astype(str(df[col].dtype).lower())
if df_cleaned is None:
return df return df
return df_cleaned
def as_2groups(df, data, group): def as_2groups(df, data, group):
"""Group the data by the given variable, ensuring only 2 groups""" """Group the data by the given variable, ensuring only 2 groups"""