diff --git a/export_db.sh b/export_db.sh index 31972b7..2c5eff0 100755 --- a/export_db.sh +++ b/export_db.sh @@ -1,4 +1,4 @@ #!/bin/bash rm html/database.db -sqlite3 database.db '.dump pbs_drug pbs_prescriber_type pbs_streamlined' | sqlite3 html/database.db +sqlite3 database.db '.dump pbs_item pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db diff --git a/html/index.html b/html/index.html index 4f38955..0e8effd 100644 --- a/html/index.html +++ b/html/index.html @@ -72,7 +72,7 @@ db = new SQL.Database(new Uint8Array(buf)); // Initialise search bar - const labels = execAsScalars(db.prepare('SELECT DISTINCT mp_pt FROM pbs_drug ORDER BY LOWER(mp_pt)')); + const labels = execAsScalars(db.prepare('SELECT DISTINCT mp_preferred_term FROM pbs_item ORDER BY LOWER(mp_preferred_term)')); const data = labels.map(label => ({'label': label})); const autocomplete = new Autocomplete(document.getElementById('search-input'), { data: data, @@ -84,7 +84,7 @@ function onClickSearchItem(item) { // Find matching PBS items - const stmt = db.prepare('SELECT *, (SELECT COUNT(1) FROM pbs_streamlined WHERE pbs_drug.item_code = pbs_streamlined.item_code) AS streamlined_authorities FROM pbs_drug LEFT JOIN pbs_prescriber_type ON pbs_drug.item_code = pbs_prescriber_type.item_code WHERE LOWER(mp_pt) = ? AND prescriber_type = "M"'); + const stmt = db.prepare('SELECT * FROM pbs_item WHERE LOWER(mp_preferred_term) = ?'); stmt.bind([item.label.toLowerCase()]); const items = execAsObjects(stmt); @@ -95,26 +95,25 @@ tbody.innerHTML = ''; for (let item of items) { const tr = document.createElement('tr'); - let td = document.createElement('td'); td.innerHTML = '' + item['item_code'] + ''; tr.appendChild(td); - td = document.createElement('td'); td.innerText = item['tpuu_or_mpp_pt']; tr.appendChild(td); - td = document.createElement('td'); td.innerText = item['mq']; tr.appendChild(td); - td = document.createElement('td'); td.innerText = item['repeats']; tr.appendChild(td); + let td = document.createElement('td'); td.innerHTML = '' + item['code'] + ''; tr.appendChild(td); + td = document.createElement('td'); td.innerText = item['mpp_preferred_term']; tr.appendChild(td); + td = document.createElement('td'); td.innerText = item['maximum_prescribable_units']; tr.appendChild(td); + td = document.createElement('td'); td.innerText = item['number_repeats']; tr.appendChild(td); - if (item['restriction_flag'] === 'U') { + if (item['benefit_type'] === 'unrestricted') { td = document.createElement('td'); tr.appendChild(td); - } else if (item['restriction_flag'] === 'R') { - td = document.createElement('td'); td.innerHTML = 'Restricted'; tr.appendChild(td); + } else if (item['benefit_type'] === 'restricted') { + td = document.createElement('td'); td.innerHTML = 'Restricted'; tr.appendChild(td); tr.classList.add('table-warning'); - } else if (item['restriction_flag'] === 'A') { - if (item['streamlined_authorities'] > 0) { - td = document.createElement('td'); td.innerHTML = 'Streamlined'; tr.appendChild(td); - tr.classList.add('table-warning'); - } else { - td = document.createElement('td'); td.innerHTML = 'Authority'; tr.appendChild(td); - tr.classList.add('table-danger'); - } + } else if (item['benefit_type'] === 'streamlined') { + td = document.createElement('td'); td.innerHTML = 'Streamlined'; tr.appendChild(td); + tr.classList.add('table-warning'); + } else if (item['benefit_type'] === 'authority') { + td = document.createElement('td'); td.innerHTML = 'Authority'; tr.appendChild(td); + tr.classList.add('table-danger'); } else { - td = document.createElement('td'); td.innerText = item['restriction_flag']; tr.appendChild(td); + alert('Unknown benefit type: ' + item['benefit_type']); + throw 'Unknown benefit type: ' + item['benefit_type']; } tbody.appendChild(tr); @@ -126,17 +125,17 @@ function comparePBSItems(item1, item2) { // Sort tablets/capsules before other forms - if ((item1['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item1['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0) && !(item2['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item2['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0)) { + if ((item1['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item1['mpp_preferred_term'].indexOf(' capsule, ') >= 0) && !(item2['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item2['mpp_preferred_term'].indexOf(' capsule, ') >= 0)) { return -1; } - if ((item2['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item2['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0) && !(item1['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item1['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0)) { + if ((item2['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item2['mpp_preferred_term'].indexOf(' capsule, ') >= 0) && !(item1['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item1['mpp_preferred_term'].indexOf(' capsule, ') >= 0)) { return 1; } - // Compare tpuu_or_mpp_pt word-by-word accounting for numbers + // Compare mpp_preferred_term word-by-word accounting for numbers - const bits1 = item1['tpuu_or_mpp_pt'].split(' '); - const bits2 = item2['tpuu_or_mpp_pt'].split(' '); + const bits1 = item1['mpp_preferred_term'].split(' '); + const bits2 = item2['mpp_preferred_term'].split(' '); for (let i = 0; i < bits1.length && i < bits2.length; i++) { if (regexIsNumber.test(bits1[i]) && regexIsNumber.test(bits2[i])) { @@ -163,8 +162,8 @@ // Sort unrestricted, then restricted/streamlined, then authority required - const type1 = item1['restriction_flag'] === 'U' ? 0 : item1['restriction_flag'] === 'R' ? 1 : item1['restriction_flag'] === 'S' ? 1 : item1['streamlined_authorities'] > 0 ? 2 : 3; - const type2 = item2['restriction_flag'] === 'U' ? 0 : item2['restriction_flag'] === 'R' ? 1 : item2['restriction_flag'] === 'S' ? 1 : item2['streamlined_authorities'] > 0 ? 2 : 3; + const type1 = item1['benefit_type'] === 'unrestricted' ? 0 : item1['benefit_type'] === 'restricted' ? 1 : item1['benefit_type'] === 'streamlined' ? 1 : 2; + const type2 = item2['benefit_type'] === 'unrestricted' ? 0 : item2['benefit_type'] === 'restricted' ? 1 : item2['benefit_type'] === 'streamlined' ? 1 : 2; return type1 - type2; } diff --git a/import_pbs.py b/import_pbs.py deleted file mode 100644 index 4a264a6..0000000 --- a/import_pbs.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright © 2023 Lee Yingtong Li (RunasSudo) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import pandas as pd -import sqlite3 -import zipfile - -con = sqlite3.connect('database.db') -cur = con.cursor() - -# Init schema -cur.execute('DROP TABLE IF EXISTS pbs_drug') -cur.execute('CREATE TABLE pbs_drug (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), mp_pt TEXT, tpuu_or_mpp_pt TEXT, restriction_flag CHARACTER(1), mq INTEGER, repeats INTEGER)') - -cur.execute('DROP TABLE IF EXISTS pbs_prescriber_type') -cur.execute('CREATE TABLE pbs_prescriber_type (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), prescriber_type CHARACTER(1))') - -cur.execute('DROP TABLE IF EXISTS pbs_streamlined') -cur.execute('CREATE TABLE pbs_streamlined (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), treatment_of_code INTEGER)') - -# Read drug list, prescriber type -with zipfile.ZipFile('data/2023-01-01-v3extracts.zip', 'r') as zipf: - # drug_xxx.txt - - with zipf.open('drug_20230101.txt', 'r') as f: - df_drug = pd.read_csv(f, sep='!') - - for _, drug in df_drug.iterrows(): - # Skip already added - cur.execute('SELECT COUNT(*) FROM pbs_drug WHERE item_code=?', (drug['item-code'],)) - if cur.fetchone()[0] > 0: - continue - - cur.execute('INSERT INTO pbs_drug (item_code, mp_pt, tpuu_or_mpp_pt, restriction_flag, mq, repeats) VALUES (?, ?, ?, ?, ?, ?)', (drug['item-code'], drug['mp-pt'], drug['tpuu-or-mpp-pt'], drug['restriction-flag'], drug['mq'], drug['repeats'])) - - # Prescriber_type_xxx.txt - - with zipf.open('Prescriber_type_20230101.txt', 'r') as f: - df_prescriber_type = pd.read_csv(f, sep='\t', header=0, names=['mp-pt', 'item-code', 'prescriber-type']) - - for _, prescriber_type in df_prescriber_type.iterrows(): - cur.execute('INSERT INTO pbs_prescriber_type (item_code, prescriber_type) VALUES (?, ?)', (prescriber_type['item-code'], prescriber_type['prescriber-type'])) - - # streamlined_xxx.txt (streamlined authorities) - - with zipf.open('streamlined_20230101.txt', 'r') as f: - df_streamlined = pd.read_csv(f, sep='\t') - - for _, streamlined in df_streamlined.iterrows(): - cur.execute('INSERT INTO pbs_streamlined (item_code, treatment_of_code) VALUES (?, ?)', (streamlined['item-code'], streamlined['treatment-of-code'])) - -con.commit() diff --git a/import_pbs_xml.py b/import_pbs_xml.py new file mode 100644 index 0000000..12437da --- /dev/null +++ b/import_pbs_xml.py @@ -0,0 +1,167 @@ +# Copyright © 2023 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sqlite3 +import zipfile +from xml.etree import ElementTree as ET + +# Open database +con = sqlite3.connect('database.db') +cur = con.cursor() + +# Init schema +cur.execute('DROP TABLE IF EXISTS pbs_item') +cur.execute('CREATE TABLE pbs_item (code TEXT PRIMARY KEY, mpp_preferred_term TEXT, mp_preferred_term TEXT, maximum_prescribable_units INTEGER, number_repeats INTEGER, benefit_type TEXT)') + +cur.execute('DROP TABLE IF EXISTS pbs_item_restriction') +cur.execute('CREATE TABLE pbs_item_restriction (item_code TEXT, restriction_code INTEGER)') + +cur.execute('DROP TABLE IF EXISTS pbs_restriction') +cur.execute('CREATE TABLE pbs_restriction (code INTEGER PRIMARY KEY, treatment_of INTEGER, indication TEXT, criteria_operator TEXT)') + +cur.execute('DROP TABLE IF EXISTS pbs_restriction_criteria') +cur.execute('CREATE TABLE pbs_restriction_criteria (restriction_code INTEGER, criteria_code INTEGER)') + +cur.execute('DROP TABLE IF EXISTS pbs_criteria') +cur.execute('CREATE TABLE pbs_criteria (code INTEGER PRIMARY KEY, type TEXT, parameters_operator TEXT)') + +cur.execute('DROP TABLE IF EXISTS pbs_criteria_parameter') +cur.execute('CREATE TABLE pbs_criteria_parameter (id INTEGER PRIMARY KEY AUTOINCREMENT, criteria_code INTEGER, text TEXT)') + +# Parse XML +with zipfile.ZipFile('data/2023-01-01-xml-V3.zip', 'r') as zipf: + with zipf.open('sch-2023-01-01-r1.xml', 'r') as f: + tree = ET.parse(f) + +print('Parsed XML') + +root = tree.getroot() +ns = {'pbs': 'http://schema.pbs.gov.au/', 'xlink': 'http://www.w3.org/1999/xlink', 'xml': 'http://www.w3.org/XML/1998/namespace', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dbk': 'http://docbook.org/ns/docbook'} + +# Get General Schedule +program = next(p for p in root.find('pbs:schedule', ns).findall('pbs:program', ns) if p.find('pbs:info', ns).find('pbs:code', ns).text == 'GE') + +restrictions_to_parse = set() +criteria_to_parse = set() + +# Get schedule items (prescribing-rule) in schedule +for item in program.findall('pbs:prescribing-rule', ns): + code = item.find('pbs:code', ns).text + + # Only get benefits available to medical practitioners + benefits = [b for b in item.find('pbs:benefit-types-list', ns).findall('pbs:benefit-type', ns) if b.find('pbs:member-of-list', ns).find('pbs:member-of[@rdf:resource="http://pbs.gov.au/prescriber/medical"]', ns)] + + if not benefits: + continue + + assert len(benefits) == 1 + benefit = benefits[0] + + mpp_id = item.find('pbs:ready-prepared', ns).find('pbs:mpp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#') + mpp = root.find('pbs:drugs-list', ns).find('pbs:mpp[@xml:id="' + mpp_id + '"]', ns) + mpp_preferred_term = mpp.find('pbs:preferred-term', ns).text + + mp_id = mpp.find('pbs:drug-references-list', ns).find('pbs:mp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#') + mp = root.find('pbs:drugs-list', ns).find('pbs:mp[@xml:id="' + mp_id + '"]', ns) + mp_preferred_term = mp.find('pbs:preferred-term[@rdf:resource="http://pbs.gov.au/clinical"]', ns).text + + max_units = item.find('pbs:ready-prepared', ns).find('pbs:maximum-prescribable[@rdf:resource="http://pbs.gov.au/reference/unit-of-use"]', ns).find('pbs:value', ns).text + max_repeats = item.find('pbs:ready-prepared', ns).find('pbs:number-repeats', ns).find('pbs:value', ns).text + + benefit_type = { + 'http://pbs.gov.au/benefit-type/unrestricted': 'unrestricted', + 'http://pbs.gov.au/benefit-type/restricted': 'restricted', + 'http://pbs.gov.au/benefit-type/streamlined': 'streamlined', + 'http://pbs.gov.au/benefit-type/authority-required': 'authority', + }[benefit.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')] + + cur.execute('INSERT INTO pbs_item (code, mpp_preferred_term, mp_preferred_term, maximum_prescribable_units, number_repeats, benefit_type) VALUES (?, ?, ?, ?, ?, ?)', (code, mpp_preferred_term, mp_preferred_term, max_units, max_repeats, benefit_type)) + + # Get restrictions + if restrictions := benefit.find('pbs:restriction-references-list', ns): + for restriction_reference in restrictions.findall('pbs:restriction-reference', ns): + restriction_id = restriction_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#') + restriction_code = restriction_reference.find('pbs:code', ns).text + cur.execute('INSERT INTO pbs_item_restriction (item_code, restriction_code) VALUES (?, ?)', (code, restriction_code)) + + # Queue this restriction for parsing + restrictions_to_parse.add(restriction_id) + +# Parse restrictions +for restriction_id in sorted(list(restrictions_to_parse)): + restriction = root.find('pbs:prescribing-texts-list', ns).find('pbs:restriction[@xml:id="' + restriction_id + '"]', ns) + code = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/restriction"]', ns).text + treatment_of = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/treatment-of"]', ns).text + + # Build the name of the indication (episodicity, severity, condition) + indication_id = restriction.find('pbs:indication-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#') + indication = root.find('pbs:prescribing-texts-list', ns).find('pbs:indication[@xml:id="' + indication_id + '"]', ns) + indication_strings = [] + + if episodicity_reference := indication.find('pbs:episodicity-reference', ns): + episodicity_id = episodicity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#') + episodicity = root.find('pbs:prescribing-texts-list', ns).find('pbs:episodicity[@xml:id="' + episodicity_id + '"]', ns) + episodicity_term = episodicity.find('pbs:preferred-term', ns).text.strip() + indication_strings.append(episodicity_term) + + if severity_reference := indication.find('pbs:severity-reference', ns): + severity_id = severity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#') + severity = root.find('pbs:prescribing-texts-list', ns).find('pbs:severity[@xml:id="' + severity_id + '"]', ns) + severity_term = severity.find('pbs:preferred-term', ns).text.strip() + indication_strings.append(severity_term) + + condition_id = indication.find('pbs:condition-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#') + condition = root.find('pbs:prescribing-texts-list', ns).find('pbs:condition[@xml:id="' + condition_id + '"]', ns) + condition_term = condition.find('pbs:preferred-term', ns).text.strip() + indication_strings.append(condition_term) + + # Get operator and criteria + operators = restriction.findall('pbs:any', ns) + restriction.findall('pbs:all', ns) + restriction.findall('pbs:one-of', ns) + if operators: + operator = operators[0].tag[operators[0].tag.index('}')+1:] + + for criteria_reference in operators[0].findall('pbs:criteria-reference', ns): + criteria_id = criteria_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#') + criteria_code = criteria_reference.find('pbs:code', ns).text + cur.execute('INSERT INTO pbs_restriction_criteria (restriction_code, criteria_code) VALUES (?, ?)', (code, criteria_code)) + + # Queue this criteria for parsing + criteria_to_parse.add(criteria_id) + else: + operator = None + + cur.execute('INSERT INTO pbs_restriction (code, treatment_of, indication, criteria_operator) VALUES (?, ?, ?, ?)', (code, treatment_of, ' '.join(indication_strings), operator)) + +# Parse criteria +for criteria_id in sorted(list(criteria_to_parse)): + criteria = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + criteria_id + '"]', ns) + criteria_type = criteria.tag[criteria.tag.index('}')+1:] + code = criteria.find('pbs:code', ns).text + + # Get operator and parameters + operators = criteria.findall('pbs:any', ns) + criteria.findall('pbs:all', ns) + criteria.findall('pbs:one-of', ns) + if operators: + operator = operators[0].tag[operators[0].tag.index('}')+1:] + + for parameter_reference in operators[0].findall('pbs:parameter-reference', ns): + parameter_id = parameter_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#') + parameter = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + parameter_id + '"]', ns) + + note_text = '\n'.join(p.text for n in parameter.findall('dbk:note', ns) for p in n.findall('dbk:para', ns)) + cur.execute('INSERT INTO pbs_criteria_parameter (criteria_code, text) VALUES (?, ?)', (code, note_text)) + + cur.execute('INSERT INTO pbs_criteria (code, type, parameters_operator) VALUES (?, ?, ?)', (code, criteria_type, operator)) + +con.commit()