2023-01-24 19:58:17 +11:00
|
|
|
#!/usr/bin/env python3
|
2023-01-22 21:02:59 +11:00
|
|
|
# Copyright © 2023 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
2023-02-04 14:28:11 +11:00
|
|
|
import os
|
2023-01-22 21:02:59 +11:00
|
|
|
import sqlite3
|
|
|
|
import zipfile
|
|
|
|
from xml.etree import ElementTree as ET
|
|
|
|
|
|
|
|
# Open database
|
|
|
|
con = sqlite3.connect('database.db')
|
|
|
|
cur = con.cursor()
|
|
|
|
|
|
|
|
# Init schema
|
2023-02-04 14:28:11 +11:00
|
|
|
cur.execute('DROP TABLE IF EXISTS meta')
|
|
|
|
cur.execute('CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)')
|
|
|
|
|
2023-01-22 21:02:59 +11:00
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_item')
|
2023-02-04 14:09:58 +11:00
|
|
|
cur.execute('CREATE TABLE pbs_item (code TEXT PRIMARY KEY, mpp_code TEXT, maximum_prescribable_units INTEGER, number_repeats INTEGER, benefit_type TEXT, program TEXT)')
|
2023-01-24 18:57:30 +11:00
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_mpp')
|
|
|
|
cur.execute('CREATE TABLE pbs_mpp (code TEXT PRIMARY KEY, mp_code TEXT, preferred_term TEXT)')
|
|
|
|
|
2023-01-24 19:56:54 +11:00
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_tpp')
|
|
|
|
cur.execute('CREATE TABLE pbs_tpp (code TEXT PRIMARY KEY, mpp_code TEXT, brand_name TEXT)')
|
|
|
|
|
2023-01-24 18:57:30 +11:00
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_mp')
|
|
|
|
cur.execute('CREATE TABLE pbs_mp (code TEXT PRIMARY KEY, preferred_term TEXT)')
|
2023-01-22 21:02:59 +11:00
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_item_restriction')
|
|
|
|
cur.execute('CREATE TABLE pbs_item_restriction (item_code TEXT, restriction_code INTEGER)')
|
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_restriction')
|
2023-01-23 20:31:20 +11:00
|
|
|
cur.execute('CREATE TABLE pbs_restriction (code INTEGER PRIMARY KEY, treatment_of INTEGER, indication TEXT, criteria_operator TEXT, criteria_rendered TEXT)')
|
2023-01-22 21:02:59 +11:00
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_restriction_criteria')
|
|
|
|
cur.execute('CREATE TABLE pbs_restriction_criteria (restriction_code INTEGER, criteria_code INTEGER)')
|
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_criteria')
|
|
|
|
cur.execute('CREATE TABLE pbs_criteria (code INTEGER PRIMARY KEY, type TEXT, parameters_operator TEXT)')
|
|
|
|
|
|
|
|
cur.execute('DROP TABLE IF EXISTS pbs_criteria_parameter')
|
|
|
|
cur.execute('CREATE TABLE pbs_criteria_parameter (id INTEGER PRIMARY KEY AUTOINCREMENT, criteria_code INTEGER, text TEXT)')
|
|
|
|
|
|
|
|
# Parse XML
|
2023-02-04 14:28:11 +11:00
|
|
|
pbs_zip_file = sorted([f for f in os.listdir('data') if f.endswith('-xml-V3.zip')])[-1]
|
|
|
|
|
|
|
|
with zipfile.ZipFile('data/' + pbs_zip_file, 'r') as zipf:
|
|
|
|
pbs_xml_file = next(f for f in zipf.namelist() if f.endswith('.xml'))
|
|
|
|
with zipf.open(pbs_xml_file, 'r') as f:
|
2023-01-22 21:02:59 +11:00
|
|
|
tree = ET.parse(f)
|
|
|
|
|
|
|
|
print('Parsed XML')
|
|
|
|
|
|
|
|
root = tree.getroot()
|
2023-02-04 14:28:11 +11:00
|
|
|
ns = {'pbs': 'http://schema.pbs.gov.au/', 'xlink': 'http://www.w3.org/1999/xlink', 'xml': 'http://www.w3.org/XML/1998/namespace', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dbk': 'http://docbook.org/ns/docbook', 'dct': 'http://purl.org/dc/terms/'}
|
|
|
|
|
|
|
|
# Write meta
|
|
|
|
cur.execute('INSERT INTO meta (key, value) VALUES (?, ?)', ('pbs_date', root.find('pbs:info', ns).find('dct:valid', ns).text))
|
2023-01-22 21:02:59 +11:00
|
|
|
|
2023-02-04 14:09:58 +11:00
|
|
|
# -----------------------------------------
|
|
|
|
# Parse items from each desired PBS program
|
2023-01-22 21:02:59 +11:00
|
|
|
|
2023-01-24 18:57:30 +11:00
|
|
|
mpps_to_parse = set()
|
2023-01-24 19:56:54 +11:00
|
|
|
tpps_to_parse = set()
|
2023-01-24 18:57:30 +11:00
|
|
|
mps_to_parse = set()
|
2023-01-22 21:02:59 +11:00
|
|
|
restrictions_to_parse = set()
|
|
|
|
criteria_to_parse = set()
|
|
|
|
|
2023-02-04 14:09:58 +11:00
|
|
|
def parse_program(program_code):
|
|
|
|
# Get program
|
|
|
|
program = next(p for p in root.find('pbs:schedule', ns).findall('pbs:program', ns) if p.find('pbs:info', ns).find('pbs:code', ns).text == program_code)
|
2023-01-22 21:02:59 +11:00
|
|
|
|
2023-02-04 14:09:58 +11:00
|
|
|
# Get schedule items (prescribing-rule) in schedule
|
|
|
|
for item in program.findall('pbs:prescribing-rule', ns):
|
|
|
|
code = item.find('pbs:code', ns).text
|
|
|
|
|
|
|
|
# Only get benefits available to medical practitioners
|
|
|
|
benefits = [b for b in item.find('pbs:benefit-types-list', ns).findall('pbs:benefit-type', ns) if b.find('pbs:member-of-list', ns).find('pbs:member-of[@rdf:resource="http://pbs.gov.au/prescriber/medical"]', ns)]
|
|
|
|
|
|
|
|
if not benefits:
|
|
|
|
continue
|
|
|
|
|
|
|
|
assert len(benefits) == 1
|
|
|
|
benefit = benefits[0]
|
|
|
|
|
|
|
|
mpp_id = item.find('pbs:ready-prepared', ns).find('pbs:mpp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
mpp_code = item.find('pbs:ready-prepared', ns).find('pbs:mpp-reference', ns).find('pbs:code', ns).text
|
|
|
|
|
|
|
|
max_units = item.find('pbs:ready-prepared', ns).find('pbs:maximum-prescribable[@rdf:resource="http://pbs.gov.au/reference/unit-of-use"]', ns).find('pbs:value', ns).text
|
|
|
|
max_repeats = item.find('pbs:ready-prepared', ns).find('pbs:number-repeats', ns).find('pbs:value', ns).text
|
|
|
|
|
|
|
|
benefit_type = {
|
|
|
|
'http://pbs.gov.au/benefit-type/unrestricted': 'unrestricted',
|
|
|
|
'http://pbs.gov.au/benefit-type/restricted': 'restricted',
|
|
|
|
'http://pbs.gov.au/benefit-type/streamlined': 'streamlined',
|
|
|
|
'http://pbs.gov.au/benefit-type/authority-required': 'authority',
|
|
|
|
}[benefit.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')]
|
|
|
|
|
|
|
|
cur.execute('INSERT INTO pbs_item (code, mpp_code, maximum_prescribable_units, number_repeats, benefit_type, program) VALUES (?, ?, ?, ?, ?, ?)', (code, mpp_code, max_units, max_repeats, benefit_type, program_code))
|
|
|
|
|
|
|
|
# Get restrictions
|
|
|
|
if restrictions := benefit.find('pbs:restriction-references-list', ns):
|
|
|
|
for restriction_reference in restrictions.findall('pbs:restriction-reference', ns):
|
|
|
|
restriction_id = restriction_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
restriction_code = restriction_reference.find('pbs:code', ns).text
|
|
|
|
cur.execute('INSERT INTO pbs_item_restriction (item_code, restriction_code) VALUES (?, ?)', (code, restriction_code))
|
|
|
|
|
|
|
|
# Queue this restriction for parsing
|
|
|
|
restrictions_to_parse.add(restriction_id)
|
|
|
|
|
|
|
|
# Queue the MPP for parsing
|
|
|
|
mpps_to_parse.add(mpp_id)
|
|
|
|
|
|
|
|
parse_program('GE') # General Schedule
|
|
|
|
parse_program('R1') # Repatriation PBS
|
|
|
|
|
|
|
|
# ----------------
|
|
|
|
# Parse MPPs, etc.
|
2023-01-24 18:57:30 +11:00
|
|
|
|
|
|
|
# Parse MPPs
|
|
|
|
for mpp_id in sorted(list(mpps_to_parse)):
|
|
|
|
mpp = root.find('pbs:drugs-list', ns).find('pbs:mpp[@xml:id="' + mpp_id + '"]', ns)
|
|
|
|
mpp_code = mpp.find('pbs:code', ns).text
|
|
|
|
mpp_preferred_term = mpp.find('pbs:preferred-term', ns).text
|
|
|
|
|
|
|
|
mp_id = mpp.find('pbs:drug-references-list', ns).find('pbs:mp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
mp = root.find('pbs:drugs-list', ns).find('pbs:mp[@xml:id="' + mp_id + '"]', ns)
|
|
|
|
mp_code = mp.find('pbs:code[@rdf:resource="http://pbs.gov.au/Drug/MP"]', ns).text # Must look this up because the <code> in <mp-reference> is only SNOMED
|
|
|
|
|
2023-02-04 16:03:12 +11:00
|
|
|
# Manual fixups for metoprolol
|
|
|
|
if 'METOPROLOL' in mpp_preferred_term:
|
|
|
|
# Incorrect capitalisation
|
|
|
|
mpp_preferred_term = mpp_preferred_term.replace('METOPROLOL SUCCINATE Tablet', 'metoprolol succinate')
|
|
|
|
mpp_preferred_term = mpp_preferred_term.replace('METOPROLOL TARTRATE Tablet', 'metoprolol tartrate')
|
|
|
|
|
|
|
|
# Idiosyncratic word order
|
|
|
|
mpp_preferred_term = mpp_preferred_term.replace('(controlled release)', 'modified release tablet')
|
|
|
|
mpp_preferred_term = mpp_preferred_term.replace('mg,', 'mg tablet,')
|
|
|
|
|
|
|
|
# Classify as "metoprolol tartrate"
|
|
|
|
if mp_code == '1187PBSC':
|
|
|
|
mp_id = None
|
|
|
|
mp_code = '432PBSC'
|
|
|
|
|
2023-01-24 18:57:30 +11:00
|
|
|
cur.execute('INSERT INTO pbs_mpp (code, mp_code, preferred_term) VALUES (?, ?, ?)', (mpp_code, mp_code, mpp_preferred_term))
|
|
|
|
|
|
|
|
# Queue the MP for parsing
|
2023-02-04 16:03:12 +11:00
|
|
|
if mp_id:
|
|
|
|
mps_to_parse.add(mp_id)
|
2023-01-24 18:57:30 +11:00
|
|
|
|
|
|
|
# Get TPPs
|
|
|
|
for tpp_reference in mpp.find('pbs:drug-references-list', ns).findall('pbs:tpp-reference', ns):
|
|
|
|
tpp_id = tpp_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
|
|
|
|
# Queue the TPP for parsing
|
|
|
|
tpps_to_parse.add((tpp_id, mpp_code))
|
|
|
|
|
|
|
|
# Parse MPs
|
|
|
|
for mp_id in sorted(list(mps_to_parse)):
|
|
|
|
mp = root.find('pbs:drugs-list', ns).find('pbs:mp[@xml:id="' + mp_id + '"]', ns)
|
|
|
|
mp_code = mp.find('pbs:code[@rdf:resource="http://pbs.gov.au/Drug/MP"]', ns).text # Also there are SNOMED codes but they are inconsistent
|
|
|
|
mp_preferred_term = mp.find('pbs:preferred-term[@rdf:resource="http://pbs.gov.au/clinical"]', ns).text
|
|
|
|
|
2023-02-04 16:03:12 +11:00
|
|
|
if mp_code == '432PBSC':
|
|
|
|
# Specified as all uppercase in PBS XML for some reason
|
|
|
|
mp_preferred_term = 'metoprolol'
|
|
|
|
|
2023-01-24 18:57:30 +11:00
|
|
|
cur.execute('INSERT INTO pbs_mp (code, preferred_term) VALUES (?, ?)', (mp_code, mp_preferred_term))
|
2023-01-22 21:02:59 +11:00
|
|
|
|
2023-01-24 19:56:54 +11:00
|
|
|
# Parse TPPs
|
|
|
|
for (tpp_id, mpp_code) in sorted(list(tpps_to_parse)):
|
|
|
|
tpp = root.find('pbs:drugs-list', ns).find('pbs:tpp[@xml:id="' + tpp_id + '"]', ns)
|
|
|
|
tpp_code = tpp.find('pbs:code', ns).text
|
|
|
|
tpp_brand_name = tpp.find('pbs:brand-name', ns).find('pbs:value', ns).text
|
|
|
|
|
|
|
|
cur.execute('INSERT INTO pbs_tpp (code, mpp_code, brand_name) VALUES (?, ?, ?)', (tpp_code, mpp_code, tpp_brand_name))
|
|
|
|
|
2023-01-22 21:02:59 +11:00
|
|
|
# Parse restrictions
|
|
|
|
for restriction_id in sorted(list(restrictions_to_parse)):
|
|
|
|
restriction = root.find('pbs:prescribing-texts-list', ns).find('pbs:restriction[@xml:id="' + restriction_id + '"]', ns)
|
|
|
|
code = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/restriction"]', ns).text
|
|
|
|
treatment_of = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/treatment-of"]', ns).text
|
|
|
|
|
|
|
|
# Build the name of the indication (episodicity, severity, condition)
|
|
|
|
indication_id = restriction.find('pbs:indication-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
indication = root.find('pbs:prescribing-texts-list', ns).find('pbs:indication[@xml:id="' + indication_id + '"]', ns)
|
|
|
|
indication_strings = []
|
|
|
|
|
|
|
|
if episodicity_reference := indication.find('pbs:episodicity-reference', ns):
|
|
|
|
episodicity_id = episodicity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
episodicity = root.find('pbs:prescribing-texts-list', ns).find('pbs:episodicity[@xml:id="' + episodicity_id + '"]', ns)
|
|
|
|
episodicity_term = episodicity.find('pbs:preferred-term', ns).text.strip()
|
|
|
|
indication_strings.append(episodicity_term)
|
|
|
|
|
|
|
|
if severity_reference := indication.find('pbs:severity-reference', ns):
|
|
|
|
severity_id = severity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
severity = root.find('pbs:prescribing-texts-list', ns).find('pbs:severity[@xml:id="' + severity_id + '"]', ns)
|
|
|
|
severity_term = severity.find('pbs:preferred-term', ns).text.strip()
|
|
|
|
indication_strings.append(severity_term)
|
|
|
|
|
2023-02-04 14:09:58 +11:00
|
|
|
if condition_reference := indication.find('pbs:condition-reference', ns):
|
|
|
|
condition_id = condition_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
condition = root.find('pbs:prescribing-texts-list', ns).find('pbs:condition[@xml:id="' + condition_id + '"]', ns)
|
|
|
|
condition_term = condition.find('pbs:preferred-term', ns).text.strip()
|
|
|
|
indication_strings.append(condition_term)
|
|
|
|
|
|
|
|
if not indication_strings:
|
|
|
|
# TODO: Might have a <block-container>
|
|
|
|
indication_strings = ['Unknown indication']
|
2023-01-22 21:02:59 +11:00
|
|
|
|
|
|
|
# Get operator and criteria
|
|
|
|
operators = restriction.findall('pbs:any', ns) + restriction.findall('pbs:all', ns) + restriction.findall('pbs:one-of', ns)
|
|
|
|
if operators:
|
|
|
|
operator = operators[0].tag[operators[0].tag.index('}')+1:]
|
|
|
|
|
|
|
|
for criteria_reference in operators[0].findall('pbs:criteria-reference', ns):
|
|
|
|
criteria_id = criteria_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
criteria_code = criteria_reference.find('pbs:code', ns).text
|
|
|
|
cur.execute('INSERT INTO pbs_restriction_criteria (restriction_code, criteria_code) VALUES (?, ?)', (code, criteria_code))
|
|
|
|
|
|
|
|
# Queue this criteria for parsing
|
|
|
|
criteria_to_parse.add(criteria_id)
|
|
|
|
else:
|
|
|
|
operator = None
|
|
|
|
|
|
|
|
cur.execute('INSERT INTO pbs_restriction (code, treatment_of, indication, criteria_operator) VALUES (?, ?, ?, ?)', (code, treatment_of, ' '.join(indication_strings), operator))
|
|
|
|
|
|
|
|
# Parse criteria
|
|
|
|
for criteria_id in sorted(list(criteria_to_parse)):
|
|
|
|
criteria = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + criteria_id + '"]', ns)
|
|
|
|
criteria_type = criteria.tag[criteria.tag.index('}')+1:]
|
|
|
|
code = criteria.find('pbs:code', ns).text
|
|
|
|
|
|
|
|
# Get operator and parameters
|
|
|
|
operators = criteria.findall('pbs:any', ns) + criteria.findall('pbs:all', ns) + criteria.findall('pbs:one-of', ns)
|
|
|
|
if operators:
|
|
|
|
operator = operators[0].tag[operators[0].tag.index('}')+1:]
|
|
|
|
|
|
|
|
for parameter_reference in operators[0].findall('pbs:parameter-reference', ns):
|
|
|
|
parameter_id = parameter_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
|
|
|
|
parameter = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + parameter_id + '"]', ns)
|
|
|
|
|
|
|
|
note_text = '\n'.join(p.text for n in parameter.findall('dbk:note', ns) for p in n.findall('dbk:para', ns))
|
|
|
|
cur.execute('INSERT INTO pbs_criteria_parameter (criteria_code, text) VALUES (?, ?)', (code, note_text))
|
|
|
|
|
|
|
|
cur.execute('INSERT INTO pbs_criteria (code, type, parameters_operator) VALUES (?, ?, ?)', (code, criteria_type, operator))
|
|
|
|
|
|
|
|
con.commit()
|