Read PBS data from XML file

This commit is contained in:
RunasSudo 2023-01-22 21:02:59 +11:00
parent 2398f75203
commit ae8b988e26
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
4 changed files with 192 additions and 90 deletions

View File

@ -1,4 +1,4 @@
#!/bin/bash
rm html/database.db
sqlite3 database.db '.dump pbs_drug pbs_prescriber_type pbs_streamlined' | sqlite3 html/database.db
sqlite3 database.db '.dump pbs_item pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db

View File

@ -72,7 +72,7 @@
db = new SQL.Database(new Uint8Array(buf));
// Initialise search bar
const labels = execAsScalars(db.prepare('SELECT DISTINCT mp_pt FROM pbs_drug ORDER BY LOWER(mp_pt)'));
const labels = execAsScalars(db.prepare('SELECT DISTINCT mp_preferred_term FROM pbs_item ORDER BY LOWER(mp_preferred_term)'));
const data = labels.map(label => ({'label': label}));
const autocomplete = new Autocomplete(document.getElementById('search-input'), {
data: data,
@ -84,7 +84,7 @@
function onClickSearchItem(item) {
// Find matching PBS items
const stmt = db.prepare('SELECT *, (SELECT COUNT(1) FROM pbs_streamlined WHERE pbs_drug.item_code = pbs_streamlined.item_code) AS streamlined_authorities FROM pbs_drug LEFT JOIN pbs_prescriber_type ON pbs_drug.item_code = pbs_prescriber_type.item_code WHERE LOWER(mp_pt) = ? AND prescriber_type = "M"');
const stmt = db.prepare('SELECT * FROM pbs_item WHERE LOWER(mp_preferred_term) = ?');
stmt.bind([item.label.toLowerCase()]);
const items = execAsObjects(stmt);
@ -95,26 +95,25 @@
tbody.innerHTML = '';
for (let item of items) {
const tr = document.createElement('tr');
let td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['item_code'] + '" target="_blank">' + item['item_code'] + '</a>'; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['tpuu_or_mpp_pt']; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['mq']; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['repeats']; tr.appendChild(td);
let td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['code'] + '" target="_blank">' + item['code'] + '</a>'; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['mpp_preferred_term']; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['maximum_prescribable_units']; tr.appendChild(td);
td = document.createElement('td'); td.innerText = item['number_repeats']; tr.appendChild(td);
if (item['restriction_flag'] === 'U') {
if (item['benefit_type'] === 'unrestricted') {
td = document.createElement('td'); tr.appendChild(td);
} else if (item['restriction_flag'] === 'R') {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['item_code'] + '" target="_blank">Restricted</a>'; tr.appendChild(td);
} else if (item['benefit_type'] === 'restricted') {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['code'] + '" target="_blank">Restricted</a>'; tr.appendChild(td);
tr.classList.add('table-warning');
} else if (item['restriction_flag'] === 'A') {
if (item['streamlined_authorities'] > 0) {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['item_code'] + '" target="_blank">Streamlined</a>'; tr.appendChild(td);
tr.classList.add('table-warning');
} else {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['item_code'] + '" target="_blank">Authority</a>'; tr.appendChild(td);
tr.classList.add('table-danger');
}
} else if (item['benefit_type'] === 'streamlined') {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['code'] + '" target="_blank">Streamlined</a>'; tr.appendChild(td);
tr.classList.add('table-warning');
} else if (item['benefit_type'] === 'authority') {
td = document.createElement('td'); td.innerHTML = '<a href="https://www.pbs.gov.au/medicine/item/' + item['code'] + '" target="_blank">Authority</a>'; tr.appendChild(td);
tr.classList.add('table-danger');
} else {
td = document.createElement('td'); td.innerText = item['restriction_flag']; tr.appendChild(td);
alert('Unknown benefit type: ' + item['benefit_type']);
throw 'Unknown benefit type: ' + item['benefit_type'];
}
tbody.appendChild(tr);
@ -126,17 +125,17 @@
function comparePBSItems(item1, item2) {
// Sort tablets/capsules before other forms
if ((item1['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item1['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0) && !(item2['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item2['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0)) {
if ((item1['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item1['mpp_preferred_term'].indexOf(' capsule, ') >= 0) && !(item2['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item2['mpp_preferred_term'].indexOf(' capsule, ') >= 0)) {
return -1;
}
if ((item2['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item2['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0) && !(item1['tpuu_or_mpp_pt'].indexOf(' tablet, ') >= 0 || item1['tpuu_or_mpp_pt'].indexOf(' capsule, ') >= 0)) {
if ((item2['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item2['mpp_preferred_term'].indexOf(' capsule, ') >= 0) && !(item1['mpp_preferred_term'].indexOf(' tablet, ') >= 0 || item1['mpp_preferred_term'].indexOf(' capsule, ') >= 0)) {
return 1;
}
// Compare tpuu_or_mpp_pt word-by-word accounting for numbers
// Compare mpp_preferred_term word-by-word accounting for numbers
const bits1 = item1['tpuu_or_mpp_pt'].split(' ');
const bits2 = item2['tpuu_or_mpp_pt'].split(' ');
const bits1 = item1['mpp_preferred_term'].split(' ');
const bits2 = item2['mpp_preferred_term'].split(' ');
for (let i = 0; i < bits1.length && i < bits2.length; i++) {
if (regexIsNumber.test(bits1[i]) && regexIsNumber.test(bits2[i])) {
@ -163,8 +162,8 @@
// Sort unrestricted, then restricted/streamlined, then authority required
const type1 = item1['restriction_flag'] === 'U' ? 0 : item1['restriction_flag'] === 'R' ? 1 : item1['restriction_flag'] === 'S' ? 1 : item1['streamlined_authorities'] > 0 ? 2 : 3;
const type2 = item2['restriction_flag'] === 'U' ? 0 : item2['restriction_flag'] === 'R' ? 1 : item2['restriction_flag'] === 'S' ? 1 : item2['streamlined_authorities'] > 0 ? 2 : 3;
const type1 = item1['benefit_type'] === 'unrestricted' ? 0 : item1['benefit_type'] === 'restricted' ? 1 : item1['benefit_type'] === 'streamlined' ? 1 : 2;
const type2 = item2['benefit_type'] === 'unrestricted' ? 0 : item2['benefit_type'] === 'restricted' ? 1 : item2['benefit_type'] === 'streamlined' ? 1 : 2;
return type1 - type2;
}

View File

@ -1,64 +0,0 @@
# Copyright © 2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import pandas as pd
import sqlite3
import zipfile
con = sqlite3.connect('database.db')
cur = con.cursor()
# Init schema
cur.execute('DROP TABLE IF EXISTS pbs_drug')
cur.execute('CREATE TABLE pbs_drug (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), mp_pt TEXT, tpuu_or_mpp_pt TEXT, restriction_flag CHARACTER(1), mq INTEGER, repeats INTEGER)')
cur.execute('DROP TABLE IF EXISTS pbs_prescriber_type')
cur.execute('CREATE TABLE pbs_prescriber_type (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), prescriber_type CHARACTER(1))')
cur.execute('DROP TABLE IF EXISTS pbs_streamlined')
cur.execute('CREATE TABLE pbs_streamlined (id INTEGER PRIMARY KEY AUTOINCREMENT, item_code CHARACTER(6), treatment_of_code INTEGER)')
# Read drug list, prescriber type
with zipfile.ZipFile('data/2023-01-01-v3extracts.zip', 'r') as zipf:
# drug_xxx.txt
with zipf.open('drug_20230101.txt', 'r') as f:
df_drug = pd.read_csv(f, sep='!')
for _, drug in df_drug.iterrows():
# Skip already added
cur.execute('SELECT COUNT(*) FROM pbs_drug WHERE item_code=?', (drug['item-code'],))
if cur.fetchone()[0] > 0:
continue
cur.execute('INSERT INTO pbs_drug (item_code, mp_pt, tpuu_or_mpp_pt, restriction_flag, mq, repeats) VALUES (?, ?, ?, ?, ?, ?)', (drug['item-code'], drug['mp-pt'], drug['tpuu-or-mpp-pt'], drug['restriction-flag'], drug['mq'], drug['repeats']))
# Prescriber_type_xxx.txt
with zipf.open('Prescriber_type_20230101.txt', 'r') as f:
df_prescriber_type = pd.read_csv(f, sep='\t', header=0, names=['mp-pt', 'item-code', 'prescriber-type'])
for _, prescriber_type in df_prescriber_type.iterrows():
cur.execute('INSERT INTO pbs_prescriber_type (item_code, prescriber_type) VALUES (?, ?)', (prescriber_type['item-code'], prescriber_type['prescriber-type']))
# streamlined_xxx.txt (streamlined authorities)
with zipf.open('streamlined_20230101.txt', 'r') as f:
df_streamlined = pd.read_csv(f, sep='\t')
for _, streamlined in df_streamlined.iterrows():
cur.execute('INSERT INTO pbs_streamlined (item_code, treatment_of_code) VALUES (?, ?)', (streamlined['item-code'], streamlined['treatment-of-code']))
con.commit()

167
import_pbs_xml.py Normal file
View File

@ -0,0 +1,167 @@
# Copyright © 2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import sqlite3
import zipfile
from xml.etree import ElementTree as ET
# Open database
con = sqlite3.connect('database.db')
cur = con.cursor()
# Init schema
cur.execute('DROP TABLE IF EXISTS pbs_item')
cur.execute('CREATE TABLE pbs_item (code TEXT PRIMARY KEY, mpp_preferred_term TEXT, mp_preferred_term TEXT, maximum_prescribable_units INTEGER, number_repeats INTEGER, benefit_type TEXT)')
cur.execute('DROP TABLE IF EXISTS pbs_item_restriction')
cur.execute('CREATE TABLE pbs_item_restriction (item_code TEXT, restriction_code INTEGER)')
cur.execute('DROP TABLE IF EXISTS pbs_restriction')
cur.execute('CREATE TABLE pbs_restriction (code INTEGER PRIMARY KEY, treatment_of INTEGER, indication TEXT, criteria_operator TEXT)')
cur.execute('DROP TABLE IF EXISTS pbs_restriction_criteria')
cur.execute('CREATE TABLE pbs_restriction_criteria (restriction_code INTEGER, criteria_code INTEGER)')
cur.execute('DROP TABLE IF EXISTS pbs_criteria')
cur.execute('CREATE TABLE pbs_criteria (code INTEGER PRIMARY KEY, type TEXT, parameters_operator TEXT)')
cur.execute('DROP TABLE IF EXISTS pbs_criteria_parameter')
cur.execute('CREATE TABLE pbs_criteria_parameter (id INTEGER PRIMARY KEY AUTOINCREMENT, criteria_code INTEGER, text TEXT)')
# Parse XML
with zipfile.ZipFile('data/2023-01-01-xml-V3.zip', 'r') as zipf:
with zipf.open('sch-2023-01-01-r1.xml', 'r') as f:
tree = ET.parse(f)
print('Parsed XML')
root = tree.getroot()
ns = {'pbs': 'http://schema.pbs.gov.au/', 'xlink': 'http://www.w3.org/1999/xlink', 'xml': 'http://www.w3.org/XML/1998/namespace', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dbk': 'http://docbook.org/ns/docbook'}
# Get General Schedule
program = next(p for p in root.find('pbs:schedule', ns).findall('pbs:program', ns) if p.find('pbs:info', ns).find('pbs:code', ns).text == 'GE')
restrictions_to_parse = set()
criteria_to_parse = set()
# Get schedule items (prescribing-rule) in schedule
for item in program.findall('pbs:prescribing-rule', ns):
code = item.find('pbs:code', ns).text
# Only get benefits available to medical practitioners
benefits = [b for b in item.find('pbs:benefit-types-list', ns).findall('pbs:benefit-type', ns) if b.find('pbs:member-of-list', ns).find('pbs:member-of[@rdf:resource="http://pbs.gov.au/prescriber/medical"]', ns)]
if not benefits:
continue
assert len(benefits) == 1
benefit = benefits[0]
mpp_id = item.find('pbs:ready-prepared', ns).find('pbs:mpp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
mpp = root.find('pbs:drugs-list', ns).find('pbs:mpp[@xml:id="' + mpp_id + '"]', ns)
mpp_preferred_term = mpp.find('pbs:preferred-term', ns).text
mp_id = mpp.find('pbs:drug-references-list', ns).find('pbs:mp-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
mp = root.find('pbs:drugs-list', ns).find('pbs:mp[@xml:id="' + mp_id + '"]', ns)
mp_preferred_term = mp.find('pbs:preferred-term[@rdf:resource="http://pbs.gov.au/clinical"]', ns).text
max_units = item.find('pbs:ready-prepared', ns).find('pbs:maximum-prescribable[@rdf:resource="http://pbs.gov.au/reference/unit-of-use"]', ns).find('pbs:value', ns).text
max_repeats = item.find('pbs:ready-prepared', ns).find('pbs:number-repeats', ns).find('pbs:value', ns).text
benefit_type = {
'http://pbs.gov.au/benefit-type/unrestricted': 'unrestricted',
'http://pbs.gov.au/benefit-type/restricted': 'restricted',
'http://pbs.gov.au/benefit-type/streamlined': 'streamlined',
'http://pbs.gov.au/benefit-type/authority-required': 'authority',
}[benefit.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')]
cur.execute('INSERT INTO pbs_item (code, mpp_preferred_term, mp_preferred_term, maximum_prescribable_units, number_repeats, benefit_type) VALUES (?, ?, ?, ?, ?, ?)', (code, mpp_preferred_term, mp_preferred_term, max_units, max_repeats, benefit_type))
# Get restrictions
if restrictions := benefit.find('pbs:restriction-references-list', ns):
for restriction_reference in restrictions.findall('pbs:restriction-reference', ns):
restriction_id = restriction_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
restriction_code = restriction_reference.find('pbs:code', ns).text
cur.execute('INSERT INTO pbs_item_restriction (item_code, restriction_code) VALUES (?, ?)', (code, restriction_code))
# Queue this restriction for parsing
restrictions_to_parse.add(restriction_id)
# Parse restrictions
for restriction_id in sorted(list(restrictions_to_parse)):
restriction = root.find('pbs:prescribing-texts-list', ns).find('pbs:restriction[@xml:id="' + restriction_id + '"]', ns)
code = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/restriction"]', ns).text
treatment_of = restriction.find('pbs:code[@rdf:resource="http://pbs.gov.au/code/treatment-of"]', ns).text
# Build the name of the indication (episodicity, severity, condition)
indication_id = restriction.find('pbs:indication-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
indication = root.find('pbs:prescribing-texts-list', ns).find('pbs:indication[@xml:id="' + indication_id + '"]', ns)
indication_strings = []
if episodicity_reference := indication.find('pbs:episodicity-reference', ns):
episodicity_id = episodicity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
episodicity = root.find('pbs:prescribing-texts-list', ns).find('pbs:episodicity[@xml:id="' + episodicity_id + '"]', ns)
episodicity_term = episodicity.find('pbs:preferred-term', ns).text.strip()
indication_strings.append(episodicity_term)
if severity_reference := indication.find('pbs:severity-reference', ns):
severity_id = severity_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
severity = root.find('pbs:prescribing-texts-list', ns).find('pbs:severity[@xml:id="' + severity_id + '"]', ns)
severity_term = severity.find('pbs:preferred-term', ns).text.strip()
indication_strings.append(severity_term)
condition_id = indication.find('pbs:condition-reference', ns).get('{http://www.w3.org/1999/xlink}href').lstrip('#')
condition = root.find('pbs:prescribing-texts-list', ns).find('pbs:condition[@xml:id="' + condition_id + '"]', ns)
condition_term = condition.find('pbs:preferred-term', ns).text.strip()
indication_strings.append(condition_term)
# Get operator and criteria
operators = restriction.findall('pbs:any', ns) + restriction.findall('pbs:all', ns) + restriction.findall('pbs:one-of', ns)
if operators:
operator = operators[0].tag[operators[0].tag.index('}')+1:]
for criteria_reference in operators[0].findall('pbs:criteria-reference', ns):
criteria_id = criteria_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
criteria_code = criteria_reference.find('pbs:code', ns).text
cur.execute('INSERT INTO pbs_restriction_criteria (restriction_code, criteria_code) VALUES (?, ?)', (code, criteria_code))
# Queue this criteria for parsing
criteria_to_parse.add(criteria_id)
else:
operator = None
cur.execute('INSERT INTO pbs_restriction (code, treatment_of, indication, criteria_operator) VALUES (?, ?, ?, ?)', (code, treatment_of, ' '.join(indication_strings), operator))
# Parse criteria
for criteria_id in sorted(list(criteria_to_parse)):
criteria = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + criteria_id + '"]', ns)
criteria_type = criteria.tag[criteria.tag.index('}')+1:]
code = criteria.find('pbs:code', ns).text
# Get operator and parameters
operators = criteria.findall('pbs:any', ns) + criteria.findall('pbs:all', ns) + criteria.findall('pbs:one-of', ns)
if operators:
operator = operators[0].tag[operators[0].tag.index('}')+1:]
for parameter_reference in operators[0].findall('pbs:parameter-reference', ns):
parameter_id = parameter_reference.get('{http://www.w3.org/1999/xlink}href').lstrip('#')
parameter = root.find('pbs:prescribing-texts-list', ns).find('*[@xml:id="' + parameter_id + '"]', ns)
note_text = '\n'.join(p.text for n in parameter.findall('dbk:note', ns) for p in n.findall('dbk:para', ns))
cur.execute('INSERT INTO pbs_criteria_parameter (criteria_code, text) VALUES (?, ?)', (code, note_text))
cur.execute('INSERT INTO pbs_criteria (code, type, parameters_operator) VALUES (?, ?, ?)', (code, criteria_type, operator))
con.commit()