diff --git a/yli/__init__.py b/yli/__init__.py index 2a3748e..25a978f 100644 --- a/yli/__init__.py +++ b/yli/__init__.py @@ -15,6 +15,7 @@ # along with this program. If not, see . from .distributions import beta_oddsratio, beta_ratio, hdi, transformed_dist +from .fs import pickle_read_compressed, pickle_read_encrypted, pickle_write_compressed, pickle_write_encrypted from .sig_tests import chi2, mannwhitney, ttest_ind def reload_me(): diff --git a/yli/fs.py b/yli/fs.py new file mode 100644 index 0000000..467cfb8 --- /dev/null +++ b/yli/fs.py @@ -0,0 +1,97 @@ +# scipy-yli: Helpful SciPy utilities and recipes +# Copyright © 2022 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from Crypto.Cipher import AES +from Crypto.Protocol.KDF import scrypt +from Crypto.Random import get_random_bytes + +import getpass +import io +import lzma +import pickle + +def pickle_write_encrypted(df, fname, salt=None): + """ + Write the DataFrame to an encrypted file + + DataFrame is serialised with Pickle, compressed with LZMA, then encrypted + Encryption is AES-256-CTR + Password is derived using scrypt KDF + """ + + # Get random salt + if salt is None: + salt = get_random_bytes(16) + + # Serialise Pandas and compress + pickle_pt = io.BytesIO() + with lzma.LZMAFile(pickle_pt, 'wb') as f: + pickle.dump(df, f) + + # Generate key using scrypt + password = getpass.getpass('Password: ') + key = scrypt(password, salt, 256//8, 2**20, 8, 1) + + # Encrypt Pandas plaintext + cipher = AES.new(key, AES.MODE_CTR) + ct_bytes = cipher.encrypt(pickle_pt.getvalue()) + + # Write to file + with open(fname, 'wb') as f: + pickle.dump((salt, cipher.nonce, ct_bytes), f) + +def pickle_write_compressed(df, fname): + """ + Write the DataFrame to a compressed file + + DataFrame is serialised with Pickle and compressed with LZMA + """ + + # Serialise Pandas and compress + with open(fname, 'wb') as f: + with lzma.LZMAFile(f, 'wb') as lf: + pickle.dump(df, lf) + +def pickle_read_encrypted(fname): + """ + Read a DataFrame from an encrypted file + """ + + # Read ciphertext data + with open(fname, 'rb') as f: + salt, nonce, ct_bytes = pickle.load(f) + + # Generate key using scrypt + password = getpass.getpass('Password: ') + key = scrypt(password, salt, 256//8, 2**20, 8, 1) + + # Decrypt to Pandas plaintext + cipher = AES.new(key, AES.MODE_CTR, nonce=nonce) + pickle_pt = io.BytesIO() + pickle_pt.write(cipher.decrypt(ct_bytes)) + pickle_pt.seek(0) + + # Uncompress and deserialise to Pandas + return pickle.load(lzma.LZMAFile(pickle_pt, 'rb')) + +def pickle_read_compressed(fname): + """ + Read a DataFrame from an compressed (but not encrypted) file + """ + + # Uncompress and deserialise to Pandas + with open(fname, 'rb') as f: + return pickle.load(lzma.LZMAFile(f, 'rb'))