2022-10-13 13:26:01 +11:00
|
|
|
# scipy-yli: Helpful SciPy utilities and recipes
|
|
|
|
# Copyright © 2022 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import getpass
|
|
|
|
import io
|
|
|
|
import lzma
|
|
|
|
import pickle
|
|
|
|
|
2022-10-18 17:57:53 +11:00
|
|
|
def pickle_write_encrypted(df, fname):
|
2022-10-13 13:26:01 +11:00
|
|
|
"""
|
|
|
|
Write the DataFrame to an encrypted file
|
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
Prompts the user for a password.
|
|
|
|
|
|
|
|
The DataFrame is serialised with *pickle*, compressed with LZMA, then encrypted.
|
|
|
|
Encryption uses AES-256-CTR.
|
|
|
|
Encryption key is derived from the password using the scrypt KDF.
|
|
|
|
|
|
|
|
:param df: Data to serialise
|
|
|
|
:type df: DataFrame
|
|
|
|
:param fname: Filename to write data to
|
|
|
|
:type fname: str
|
2022-10-13 13:26:01 +11:00
|
|
|
"""
|
|
|
|
|
2022-10-18 17:58:48 +11:00
|
|
|
from Crypto.Cipher import AES
|
|
|
|
from Crypto.Protocol.KDF import scrypt
|
|
|
|
from Crypto.Random import get_random_bytes
|
|
|
|
|
2022-10-13 13:26:01 +11:00
|
|
|
# Get random salt
|
2022-10-18 17:57:53 +11:00
|
|
|
salt = get_random_bytes(16)
|
2022-10-13 13:26:01 +11:00
|
|
|
|
|
|
|
# Serialise Pandas and compress
|
|
|
|
pickle_pt = io.BytesIO()
|
|
|
|
with lzma.LZMAFile(pickle_pt, 'wb') as f:
|
|
|
|
pickle.dump(df, f)
|
|
|
|
|
|
|
|
# Generate key using scrypt
|
|
|
|
password = getpass.getpass('Password: ')
|
|
|
|
key = scrypt(password, salt, 256//8, 2**20, 8, 1)
|
|
|
|
|
|
|
|
# Encrypt Pandas plaintext
|
|
|
|
cipher = AES.new(key, AES.MODE_CTR)
|
|
|
|
ct_bytes = cipher.encrypt(pickle_pt.getvalue())
|
|
|
|
|
|
|
|
# Write to file
|
|
|
|
with open(fname, 'wb') as f:
|
|
|
|
pickle.dump((salt, cipher.nonce, ct_bytes), f)
|
|
|
|
|
|
|
|
def pickle_write_compressed(df, fname):
|
|
|
|
"""
|
|
|
|
Write the DataFrame to a compressed file
|
|
|
|
|
2022-10-17 21:41:19 +11:00
|
|
|
The DataFrame is serialised with *pickle*, then compressed with LZMA.
|
|
|
|
|
|
|
|
:param df: Data to serialise
|
|
|
|
:type df: DataFrame
|
|
|
|
:param fname: Filename to write data to
|
|
|
|
:type fname: str
|
2022-10-13 13:26:01 +11:00
|
|
|
"""
|
|
|
|
|
|
|
|
# Serialise Pandas and compress
|
|
|
|
with open(fname, 'wb') as f:
|
|
|
|
with lzma.LZMAFile(f, 'wb') as lf:
|
|
|
|
pickle.dump(df, lf)
|
|
|
|
|
|
|
|
def pickle_read_encrypted(fname):
|
|
|
|
"""
|
|
|
|
Read a DataFrame from an encrypted file
|
2022-10-17 21:41:19 +11:00
|
|
|
|
|
|
|
See :meth:`yli.pickle_write_encrypted`.
|
|
|
|
|
|
|
|
:param fname: Filename to read data from
|
|
|
|
:type fname: str
|
|
|
|
|
|
|
|
:rtype: DataFrame
|
2022-10-13 13:26:01 +11:00
|
|
|
"""
|
|
|
|
|
2022-10-18 17:58:48 +11:00
|
|
|
from Crypto.Cipher import AES
|
|
|
|
from Crypto.Protocol.KDF import scrypt
|
|
|
|
|
2022-10-13 13:26:01 +11:00
|
|
|
# Read ciphertext data
|
|
|
|
with open(fname, 'rb') as f:
|
|
|
|
salt, nonce, ct_bytes = pickle.load(f)
|
|
|
|
|
|
|
|
# Generate key using scrypt
|
|
|
|
password = getpass.getpass('Password: ')
|
|
|
|
key = scrypt(password, salt, 256//8, 2**20, 8, 1)
|
|
|
|
|
|
|
|
# Decrypt to Pandas plaintext
|
|
|
|
cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
|
|
|
|
pickle_pt = io.BytesIO()
|
|
|
|
pickle_pt.write(cipher.decrypt(ct_bytes))
|
|
|
|
pickle_pt.seek(0)
|
|
|
|
|
|
|
|
# Uncompress and deserialise to Pandas
|
|
|
|
return pickle.load(lzma.LZMAFile(pickle_pt, 'rb'))
|
|
|
|
|
|
|
|
def pickle_read_compressed(fname):
|
|
|
|
"""
|
2022-10-17 21:41:19 +11:00
|
|
|
Read a DataFrame from a compressed file
|
|
|
|
|
|
|
|
See :meth:`yli.pickle_write_compressed`.
|
|
|
|
|
|
|
|
:param fname: Filename to read data from
|
|
|
|
:type fname: str
|
|
|
|
|
|
|
|
:rtype: DataFrame
|
2022-10-13 13:26:01 +11:00
|
|
|
"""
|
|
|
|
|
|
|
|
# Uncompress and deserialise to Pandas
|
|
|
|
with open(fname, 'rb') as f:
|
|
|
|
return pickle.load(lzma.LZMAFile(f, 'rb'))
|