From 96c9e962e1d2abe8b5978249c134e26e191ab5ac Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Tue, 24 Jan 2023 19:56:54 +1100
Subject: [PATCH] Allow searching by trade name

---
 export_db.sh            |  2 +-
 find_pbs_brand_names.py | 84 +++++++++++++++++++++++++++++++++++++++++
 html/autocomplete.js    | 12 +++---
 html/index.html         | 13 +++++--
 import_pbs_xml.py       | 12 ++++++
 5 files changed, 114 insertions(+), 9 deletions(-)
 create mode 100644 find_pbs_brand_names.py

diff --git a/export_db.sh b/export_db.sh
index 9c561cd..d9d35c4 100755
--- a/export_db.sh
+++ b/export_db.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 rm html/database.db
-sqlite3 database.db '.dump pbs_item pbs_mp pbs_mpp pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db
+sqlite3 database.db '.dump pbs_item pbs_mp pbs_mp_brand_name pbs_mpp pbs_item_restriction pbs_restriction pbs_restriction_criteria pbs_criteria pbs_criteria_parameter' | sqlite3 html/database.db
diff --git a/find_pbs_brand_names.py b/find_pbs_brand_names.py
new file mode 100644
index 0000000..b09d790
--- /dev/null
+++ b/find_pbs_brand_names.py
@@ -0,0 +1,84 @@
+#   Copyright © 2023  Lee Yingtong Li (RunasSudo)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#   You should have received a copy of the GNU Affero General Public License
+#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+import sqlite3
+
+LOOKS_LIKE_DOSE = re.compile(r'([0-9/.,+%]*( ?(mg|mL|U|IU))?)+')
+
+# Open database
+con = sqlite3.connect('database.db')
+con.row_factory = sqlite3.Row
+cur = con.cursor()
+
+# Init schema
+cur.execute('DROP TABLE IF EXISTS pbs_mp_brand_name')
+cur.execute('CREATE TABLE pbs_mp_brand_name (id INTEGER PRIMARY KEY AUTOINCREMENT, mp_code STRING, brand_name STRING)')
+
+cur.execute('SELECT * FROM pbs_tpp LEFT JOIN (SELECT code, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_tpp.mpp_code = pbs_mpp.code LEFT JOIN (SELECT code, preferred_term as mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.code')
+
+brand_names = {}
+for tpp in cur.fetchall():
+	words = tpp['brand_name'].split()
+	words_lower = tpp['brand_name'].lower().split()
+	
+	# If any word of the generic name is in the brand name, skip it because it is uninteresting
+	if any(w.lower() in tpp['brand_name'].lower() for w in tpp['mp_preferred_term'].split() if w != '+'):
+		continue
+	
+	# Ignore anything that looks like a company name
+	if 'pty' in words_lower or 'ltd' in words_lower or 'australia' in words_lower:
+		continue
+	
+	# Strip all trailing words that look like a dose
+	for i in reversed(range(len(words))):
+		if LOOKS_LIKE_DOSE.fullmatch(words[i]):
+			del words[i]
+		else:
+			break
+	
+	# OK!
+	brand_name = ' '.join(words)
+	
+	if tpp['mp_code'] not in brand_names:
+		brand_names[tpp['mp_code']] = set()
+	
+	brand_names[tpp['mp_code']].add(brand_name)
+
+# Reduce names with unambiguous prefixes
+for mp_code in sorted(brand_names.keys()):
+	for brand_name in list(brand_names[mp_code]):
+		# Can we reduce the length of the name?
+		words = brand_name.split()
+		
+		for i in range(1, len(words)):
+			short_name = ' '.join(words[0:i])
+			
+			if any(b.startswith(short_name) for m in brand_names.keys() if m != mp_code for b in brand_names[m]):
+				# Conflict
+				continue
+			
+			# Can shorten
+			if brand_name in brand_names[mp_code]:
+				brand_names[mp_code].remove(brand_name)
+			brand_names[mp_code].add(short_name)
+			break
+
+# Add to database
+for mp_code in sorted(brand_names.keys()):
+	for brand_name in sorted(list(brand_names[mp_code])):
+		cur.execute('INSERT INTO pbs_mp_brand_name (mp_code, brand_name) VALUES (?, ?)', (mp_code, brand_name))
+
+con.commit()
diff --git a/html/autocomplete.js b/html/autocomplete.js
index 5dccae9..71fb6c2 100644
--- a/html/autocomplete.js
+++ b/html/autocomplete.js
@@ -75,11 +75,11 @@ class Autocomplete {
           .indexOf(removeDiacritics(lookup).toLowerCase());
       const className = Array.isArray(this.options.highlightClass) ? this.options.highlightClass.join(' ')
         : (typeof this.options.highlightClass == 'string' ? this.options.highlightClass : '');
-      label = item.label.substring(0, idx)
-        + `<span class="${className}">${item.label.substring(idx, idx + lookup.length)}</span>`
-        + item.label.substring(idx + lookup.length, item.label.length);
+      label = item.preview.substring(0, idx)
+        + `<span class="${className}">${item.preview.substring(idx, idx + lookup.length)}</span>`
+        + item.preview.substring(idx + lookup.length, item.preview.length);
     } else {
-      label = item.label;
+      label = item.preview;
     }
 
     if (this.options.showValue) {
@@ -128,7 +128,8 @@ class Autocomplete {
       const entry = this.options.data[key];
       const item = {
           label: this.options.label ? entry[this.options.label] : key,
-          value: this.options.value ? entry[this.options.value] : entry
+          preview: entry.preview,
+          value: this.options.value ? entry[this.options.value] : entry,
       };
 
       if (removeDiacritics(item.label).toLowerCase().indexOf(removeDiacritics(lookup).toLowerCase()) == 0) {
@@ -144,6 +145,7 @@ class Autocomplete {
       const entry = this.options.data[key];
       const item = {
           label: this.options.label ? entry[this.options.label] : key,
+          preview: entry.preview,
           value: this.options.value ? entry[this.options.value] : entry
       };
 
diff --git a/html/index.html b/html/index.html
index a2e4109..f9e10f7 100644
--- a/html/index.html
+++ b/html/index.html
@@ -76,8 +76,12 @@
 				db = new SQL.Database(new Uint8Array(buf));
 				
 				// Initialise search bar
-				const labels = execAsScalars(db.prepare('SELECT DISTINCT preferred_term FROM pbs_mp ORDER BY LOWER(preferred_term)'));
-				const data = labels.map(label => ({'label': label}));
+				const mp_preferred_terms = execAsScalars(db.prepare('SELECT preferred_term FROM pbs_mp ORDER BY LOWER(preferred_term)'));
+				let data = mp_preferred_terms.map(mp_preferred_term => ({'label': mp_preferred_term, 'preview': mp_preferred_term, 'value': mp_preferred_term}));
+				
+				const tpp_brand_names = execAsObjects(db.prepare('SELECT * FROM pbs_mp_brand_name LEFT JOIN pbs_mp ON pbs_mp_brand_name.mp_code = pbs_mp.code ORDER BY LOWER(brand_name)'));
+				data = data.concat(tpp_brand_names.map(tpp_brand_name => ({'label': tpp_brand_name['brand_name'], 'preview': tpp_brand_name['brand_name'] + ' <span class="text-muted">(' + tpp_brand_name['preferred_term'] + ')</span>', 'value': tpp_brand_name['preferred_term']})));
+				
 				const autocomplete = new Autocomplete(document.getElementById('search-input'), {
 					data: data,
 					maximumItems: 20,
@@ -87,9 +91,12 @@
 			}
 			
 			function onClickSearchItem(item) {
+				// Override label if clicked on a trade name
+				document.getElementById('search-input').value = item.value;
+				
 				// Find matching PBS items
 				let stmt = db.prepare('SELECT * FROM pbs_item LEFT JOIN (SELECT code AS mpp_code, preferred_term AS mpp_preferred_term, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_item.mpp_code = pbs_mpp.mpp_code LEFT JOIN (SELECT code AS mp_code, preferred_term AS mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.mp_code WHERE LOWER(mp_preferred_term) = ?');
-				stmt.bind([item.label.toLowerCase()]);
+				stmt.bind([item.value.toLowerCase()]);
 				const items = execAsObjects(stmt);
 				
 				items.sort(comparePBSItems);
diff --git a/import_pbs_xml.py b/import_pbs_xml.py
index d3b97bd..abf17f3 100644
--- a/import_pbs_xml.py
+++ b/import_pbs_xml.py
@@ -28,6 +28,9 @@ cur.execute('CREATE TABLE pbs_item (code TEXT PRIMARY KEY, mpp_code TEXT, maximu
 cur.execute('DROP TABLE IF EXISTS pbs_mpp')
 cur.execute('CREATE TABLE pbs_mpp (code TEXT PRIMARY KEY, mp_code TEXT, preferred_term TEXT)')
 
+cur.execute('DROP TABLE IF EXISTS pbs_tpp')
+cur.execute('CREATE TABLE pbs_tpp (code TEXT PRIMARY KEY, mpp_code TEXT, brand_name TEXT)')
+
 cur.execute('DROP TABLE IF EXISTS pbs_mp')
 cur.execute('CREATE TABLE pbs_mp (code TEXT PRIMARY KEY, preferred_term TEXT)')
 
@@ -60,6 +63,7 @@ ns = {'pbs': 'http://schema.pbs.gov.au/', 'xlink': 'http://www.w3.org/1999/xlink
 program = next(p for p in root.find('pbs:schedule', ns).findall('pbs:program', ns) if p.find('pbs:info', ns).find('pbs:code', ns).text == 'GE')
 
 mpps_to_parse = set()
+tpps_to_parse = set()
 mps_to_parse = set()
 restrictions_to_parse = set()
 criteria_to_parse = set()
@@ -135,6 +139,14 @@ for mp_id in sorted(list(mps_to_parse)):
 	
 	cur.execute('INSERT INTO pbs_mp (code, preferred_term) VALUES (?, ?)', (mp_code, mp_preferred_term))
 
+# Parse TPPs
+for (tpp_id, mpp_code) in sorted(list(tpps_to_parse)):
+	tpp = root.find('pbs:drugs-list', ns).find('pbs:tpp[@xml:id="' + tpp_id + '"]', ns)
+	tpp_code = tpp.find('pbs:code', ns).text
+	tpp_brand_name = tpp.find('pbs:brand-name', ns).find('pbs:value', ns).text
+	
+	cur.execute('INSERT INTO pbs_tpp (code, mpp_code, brand_name) VALUES (?, ?, ?)', (tpp_code, mpp_code, tpp_brand_name))
+
 # Parse restrictions
 for restriction_id in sorted(list(restrictions_to_parse)):
 	restriction = root.find('pbs:prescribing-texts-list', ns).find('pbs:restriction[@xml:id="' + restriction_id + '"]', ns)