From 96c9e962e1d2abe8b5978249c134e26e191ab5ac Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Tue, 24 Jan 2023 19:56:54 +1100 Subject: [PATCH] Allow searching by trade name --- export_db.sh | 2 +- find_pbs_brand_names.py | 84 +++++++++++++++++++++++++++++++++++++++++ html/autocomplete.js | 12 +++--- html/index.html | 13 +++++-- import_pbs_xml.py | 12 ++++++ 5 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 find_pbs_brand_names.py diff --git a/export_db.sh b/export_db.sh index 9c561cd..d9d35c4 100755 --- a/export_db.sh +++ b/export_db.sh @@ -1,4 +1,4 @@ #!/bin/bash rm html/database.db -sqlite3 database.db '.dump pbs_item pbs_mp pbs_mpp pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db +sqlite3 database.db '.dump pbs_item pbs_mp pbs_mp_brand_name pbs_mpp pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db diff --git a/find_pbs_brand_names.py b/find_pbs_brand_names.py new file mode 100644 index 0000000..b09d790 --- /dev/null +++ b/find_pbs_brand_names.py @@ -0,0 +1,84 @@ +# Copyright © 2023 Lee Yingtong Li (RunasSudo) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import re +import sqlite3 + +LOOKS_LIKE_DOSE = re.compile(r'([0-9/.,+%]*( ?(mg|mL|U|IU))?)+') + +# Open database +con = sqlite3.connect('database.db') +con.row_factory = sqlite3.Row +cur = con.cursor() + +# Init schema +cur.execute('DROP TABLE IF EXISTS pbs_mp_brand_name') +cur.execute('CREATE TABLE pbs_mp_brand_name (id INTEGER PRIMARY KEY AUTOINCREMENT, mp_code STRING, brand_name STRING)') + +cur.execute('SELECT * FROM pbs_tpp LEFT JOIN (SELECT code, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_tpp.mpp_code = pbs_mpp.code LEFT JOIN (SELECT code, preferred_term as mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.code') + +brand_names = {} +for tpp in cur.fetchall(): + words = tpp['brand_name'].split() + words_lower = tpp['brand_name'].lower().split() + + # If any word of the generic name is in the brand name, skip it because it is uninteresting + if any(w.lower() in tpp['brand_name'].lower() for w in tpp['mp_preferred_term'].split() if w != '+'): + continue + + # Ignore anything that looks like a company name + if 'pty' in words_lower or 'ltd' in words_lower or 'australia' in words_lower: + continue + + # Strip all trailing words that look like a dose + for i in reversed(range(len(words))): + if LOOKS_LIKE_DOSE.fullmatch(words[i]): + del words[i] + else: + break + + # OK! + brand_name = ' '.join(words) + + if tpp['mp_code'] not in brand_names: + brand_names[tpp['mp_code']] = set() + + brand_names[tpp['mp_code']].add(brand_name) + +# Reduce names with unambiguous prefixes +for mp_code in sorted(brand_names.keys()): + for brand_name in list(brand_names[mp_code]): + # Can we reduce the length of the name? + words = brand_name.split() + + for i in range(1, len(words)): + short_name = ' '.join(words[0:i]) + + if any(b.startswith(short_name) for m in brand_names.keys() if m != mp_code for b in brand_names[m]): + # Conflict + continue + + # Can shorten + if brand_name in brand_names[mp_code]: + brand_names[mp_code].remove(brand_name) + brand_names[mp_code].add(short_name) + break + +# Add to database +for mp_code in sorted(brand_names.keys()): + for brand_name in sorted(list(brand_names[mp_code])): + cur.execute('INSERT INTO pbs_mp_brand_name (mp_code, brand_name) VALUES (?, ?)', (mp_code, brand_name)) + +con.commit() diff --git a/html/autocomplete.js b/html/autocomplete.js index 5dccae9..71fb6c2 100644 --- a/html/autocomplete.js +++ b/html/autocomplete.js @@ -75,11 +75,11 @@ class Autocomplete { .indexOf(removeDiacritics(lookup).toLowerCase()); const className = Array.isArray(this.options.highlightClass) ? this.options.highlightClass.join(' ') : (typeof this.options.highlightClass == 'string' ? this.options.highlightClass : ''); - label = item.label.substring(0, idx) - + `${item.label.substring(idx, idx + lookup.length)}` - + item.label.substring(idx + lookup.length, item.label.length); + label = item.preview.substring(0, idx) + + `${item.preview.substring(idx, idx + lookup.length)}` + + item.preview.substring(idx + lookup.length, item.preview.length); } else { - label = item.label; + label = item.preview; } if (this.options.showValue) { @@ -128,7 +128,8 @@ class Autocomplete { const entry = this.options.data[key]; const item = { label: this.options.label ? entry[this.options.label] : key, - value: this.options.value ? entry[this.options.value] : entry + preview: entry.preview, + value: this.options.value ? entry[this.options.value] : entry, }; if (removeDiacritics(item.label).toLowerCase().indexOf(removeDiacritics(lookup).toLowerCase()) == 0) { @@ -144,6 +145,7 @@ class Autocomplete { const entry = this.options.data[key]; const item = { label: this.options.label ? entry[this.options.label] : key, + preview: entry.preview, value: this.options.value ? entry[this.options.value] : entry }; diff --git a/html/index.html b/html/index.html index a2e4109..f9e10f7 100644 --- a/html/index.html +++ b/html/index.html @@ -76,8 +76,12 @@ db = new SQL.Database(new Uint8Array(buf)); // Initialise search bar - const labels = execAsScalars(db.prepare('SELECT DISTINCT preferred_term FROM pbs_mp ORDER BY LOWER(preferred_term)')); - const data = labels.map(label => ({'label': label})); + const mp_preferred_terms = execAsScalars(db.prepare('SELECT preferred_term FROM pbs_mp ORDER BY LOWER(preferred_term)')); + let data = mp_preferred_terms.map(mp_preferred_term => ({'label': mp_preferred_term, 'preview': mp_preferred_term, 'value': mp_preferred_term})); + + const tpp_brand_names = execAsObjects(db.prepare('SELECT * FROM pbs_mp_brand_name LEFT JOIN pbs_mp ON pbs_mp_brand_name.mp_code = pbs_mp.code ORDER BY LOWER(brand_name)')); + data = data.concat(tpp_brand_names.map(tpp_brand_name => ({'label': tpp_brand_name['brand_name'], 'preview': tpp_brand_name['brand_name'] + ' (' + tpp_brand_name['preferred_term'] + ')', 'value': tpp_brand_name['preferred_term']}))); + const autocomplete = new Autocomplete(document.getElementById('search-input'), { data: data, maximumItems: 20, @@ -87,9 +91,12 @@ } function onClickSearchItem(item) { + // Override label if clicked on a trade name + document.getElementById('search-input').value = item.value; + // Find matching PBS items let stmt = db.prepare('SELECT * FROM pbs_item LEFT JOIN (SELECT code AS mpp_code, preferred_term AS mpp_preferred_term, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_item.mpp_code = pbs_mpp.mpp_code LEFT JOIN (SELECT code AS mp_code, preferred_term AS mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.mp_code WHERE LOWER(mp_preferred_term) = ?'); - stmt.bind([item.label.toLowerCase()]); + stmt.bind([item.value.toLowerCase()]); const items = execAsObjects(stmt); items.sort(comparePBSItems); diff --git a/import_pbs_xml.py b/import_pbs_xml.py index d3b97bd..abf17f3 100644 --- a/import_pbs_xml.py +++ b/import_pbs_xml.py @@ -28,6 +28,9 @@ cur.execute('CREATE TABLE pbs_item (code TEXT PRIMARY KEY, mpp_code TEXT, maximu cur.execute('DROP TABLE IF EXISTS pbs_mpp') cur.execute('CREATE TABLE pbs_mpp (code TEXT PRIMARY KEY, mp_code TEXT, preferred_term TEXT)') +cur.execute('DROP TABLE IF EXISTS pbs_tpp') +cur.execute('CREATE TABLE pbs_tpp (code TEXT PRIMARY KEY, mpp_code TEXT, brand_name TEXT)') + cur.execute('DROP TABLE IF EXISTS pbs_mp') cur.execute('CREATE TABLE pbs_mp (code TEXT PRIMARY KEY, preferred_term TEXT)') @@ -60,6 +63,7 @@ ns = {'pbs': 'http://schema.pbs.gov.au/', 'xlink': 'http://www.w3.org/1999/xlink program = next(p for p in root.find('pbs:schedule', ns).findall('pbs:program', ns) if p.find('pbs:info', ns).find('pbs:code', ns).text == 'GE') mpps_to_parse = set() +tpps_to_parse = set() mps_to_parse = set() restrictions_to_parse = set() criteria_to_parse = set() @@ -135,6 +139,14 @@ for mp_id in sorted(list(mps_to_parse)): cur.execute('INSERT INTO pbs_mp (code, preferred_term) VALUES (?, ?)', (mp_code, mp_preferred_term)) +# Parse TPPs +for (tpp_id, mpp_code) in sorted(list(tpps_to_parse)): + tpp = root.find('pbs:drugs-list', ns).find('pbs:tpp[@xml:id="' + tpp_id + '"]', ns) + tpp_code = tpp.find('pbs:code', ns).text + tpp_brand_name = tpp.find('pbs:brand-name', ns).find('pbs:value', ns).text + + cur.execute('INSERT INTO pbs_tpp (code, mpp_code, brand_name) VALUES (?, ?, ?)', (tpp_code, mpp_code, tpp_brand_name)) + # Parse restrictions for restriction_id in sorted(list(restrictions_to_parse)): restriction = root.find('pbs:prescribing-texts-list', ns).find('pbs:restriction[@xml:id="' + restriction_id + '"]', ns)