diff --git a/yli/utils.py b/yli/utils.py index ce95f0a..9bb0134 100644 --- a/yli/utils.py +++ b/yli/utils.py @@ -45,14 +45,18 @@ def check_nan(df, nan_policy): def convert_pandas_nullable(df): """Convert pandas nullable dtypes (e.g. Int64) to non-nullable numpy dtypes""" - # TODO: Can we avoid this copy? - df = df.copy() + # Avoid copy if possible + df_cleaned = None for col in df.columns: - if df[col].dtype == 'Int64': - df[col] = df[col].astype('int') + if df[col].dtype in ('Int64', 'Float64'): + if df_cleaned is None: + df_cleaned = df.copy() + df_cleaned[col] = df[col].astype(str(df[col].dtype).lower()) - return df + if df_cleaned is None: + return df + return df_cleaned def as_2groups(df, data, group): """Group the data by the given variable, ensuring only 2 groups"""