Implement utilities for saving/loading pandas DataFrames to/from file

This commit is contained in:
RunasSudo 2022-10-13 13:26:01 +11:00
parent 7e8418eb36
commit e1051d8aff
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
2 changed files with 98 additions and 0 deletions

View File

@ -15,6 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist
from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted
from .sig_tests import chi2, mannwhitney, ttest_ind
def reload_me():

97
yli/fs.py Normal file
View File

@ -0,0 +1,97 @@
# scipy-yli: Helpful SciPy utilities and recipes
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from Crypto.Cipher import AES
from Crypto.Protocol.KDF import scrypt
from Crypto.Random import get_random_bytes
import getpass
import io
import lzma
import pickle
def pickle_write_encrypted(df, fname, salt=None):
"""
Write the DataFrame to an encrypted file
DataFrame is serialised with Pickle, compressed with LZMA, then encrypted
Encryption is AES-256-CTR
Password is derived using scrypt KDF
"""
# Get random salt
if salt is None:
salt = get_random_bytes(16)
# Serialise Pandas and compress
pickle_pt = io.BytesIO()
with lzma.LZMAFile(pickle_pt, 'wb') as f:
pickle.dump(df, f)
# Generate key using scrypt
password = getpass.getpass('Password: ')
key = scrypt(password, salt, 256//8, 2**20, 8, 1)
# Encrypt Pandas plaintext
cipher = AES.new(key, AES.MODE_CTR)
ct_bytes = cipher.encrypt(pickle_pt.getvalue())
# Write to file
with open(fname, 'wb') as f:
pickle.dump((salt, cipher.nonce, ct_bytes), f)
def pickle_write_compressed(df, fname):
"""
Write the DataFrame to a compressed file
DataFrame is serialised with Pickle and compressed with LZMA
"""
# Serialise Pandas and compress
with open(fname, 'wb') as f:
with lzma.LZMAFile(f, 'wb') as lf:
pickle.dump(df, lf)
def pickle_read_encrypted(fname):
"""
Read a DataFrame from an encrypted file
"""
# Read ciphertext data
with open(fname, 'rb') as f:
salt, nonce, ct_bytes = pickle.load(f)
# Generate key using scrypt
password = getpass.getpass('Password: ')
key = scrypt(password, salt, 256//8, 2**20, 8, 1)
# Decrypt to Pandas plaintext
cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
pickle_pt = io.BytesIO()
pickle_pt.write(cipher.decrypt(ct_bytes))
pickle_pt.seek(0)
# Uncompress and deserialise to Pandas
return pickle.load(lzma.LZMAFile(pickle_pt, 'rb'))
def pickle_read_compressed(fname):
"""
Read a DataFrame from an compressed (but not encrypted) file
"""
# Uncompress and deserialise to Pandas
with open(fname, 'rb') as f:
return pickle.load(lzma.LZMAFile(f, 'rb'))