MedicineSearch/import_pbs.py

60 lines
2.6 KiB
Python

# Copyright © 2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import pandas as pd
import sqlite3
import zipfile
con = sqlite3.connect('database.db')
cur = con.cursor()
# Init schema
cur.execute('DROP TABLE pbs')
cur.execute('CREATE TABLE pbs (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), mp_pt TEXT, tpuu_or_mpp_pt TEXT, restriction_flag CHARACTER(1), mq INTEGER, repeats INTEGER, streamlined_authorities TEXT)')
# Read drug list, prescriber type
with zipfile.ZipFile('2023-01-01-v3extracts.zip', 'r') as zipf:
with zipf.open('drug_20230101.txt', 'r') as f:
df_drug = pd.read_csv(f, sep='!')
with zipf.open('Prescriber_type_20230101.txt', 'r') as f:
df_prescriber_type = pd.read_csv(f, sep='\t', header=0, names=['mp-pt', 'item-code', 'prescriber-type'])
df_drug = df_drug.merge(df_prescriber_type[['item-code', 'prescriber-type']], how='left', on='item-code')
# Filter only drugs able to be prescribed by medical practitioners
df_drug = df_drug[df_drug['prescriber-type'] == 'M']
for _, drug in df_drug[['item-code', 'mp-pt', 'tpuu-or-mpp-pt', 'restriction-flag', 'mq', 'repeats']].iterrows():
# Skip already added
cur.execute('SELECT COUNT(*) FROM pbs WHERE item_code=?', (drug['item-code'],))
if cur.fetchone()[0] > 0:
continue
# Add to SQL
cur.execute('INSERT INTO pbs (item_code, mp_pt, tpuu_or_mpp_pt, restriction_flag, mq, repeats) VALUES (?, ?, ?, ?, ?, ?)', (drug['item-code'], drug['mp-pt'], drug['tpuu-or-mpp-pt'], drug['restriction-flag'], drug['mq'], drug['repeats']))
# Read streamlined authorities
with zipfile.ZipFile('2023-01-01-v3extracts.zip', 'r') as zipf:
with zipf.open('streamlined_20230101.txt', 'r') as f:
df_streamlined = pd.read_csv(f, sep='\t')
df_streamlined = df_drug.merge(df_streamlined[['item-code', 'treatment-of-code']], how='inner', on='item-code')
for k, v in df_streamlined.groupby('item-code'):
cur.execute('UPDATE pbs SET streamlined_authorities=? WHERE item_code=?', (','.join(v['treatment-of-code'].astype(str)), k))
con.commit()