90 lines
3.0 KiB
Python
Executable File
90 lines
3.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright © 2023 Lee Yingtong Li (RunasSudo)
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
import re
|
|
import sqlite3
|
|
|
|
LOOKS_LIKE_DOSE = re.compile(r'([0-9/.,+%]*( ?(mg|mL|U|IU))?)+')
|
|
|
|
# Open database
|
|
con = sqlite3.connect('database.db')
|
|
con.row_factory = sqlite3.Row
|
|
cur = con.cursor()
|
|
|
|
# Init schema
|
|
cur.execute('DROP TABLE IF EXISTS pbs_mp_brand_name')
|
|
cur.execute('CREATE TABLE pbs_mp_brand_name (id INTEGER PRIMARY KEY AUTOINCREMENT, mp_code STRING, brand_name STRING)')
|
|
|
|
cur.execute('SELECT * FROM pbs_tpp LEFT JOIN (SELECT code, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_tpp.mpp_code = pbs_mpp.code LEFT JOIN (SELECT code, preferred_term as mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.code')
|
|
|
|
brand_names = {}
|
|
for tpp in cur.fetchall():
|
|
words = tpp['brand_name'].split()
|
|
words_lower = tpp['brand_name'].lower().split()
|
|
|
|
# If any word of the generic name is in the brand name, skip it because it is uninteresting
|
|
if any(w.lower() in tpp['brand_name'].lower() for w in tpp['mp_preferred_term'].split() if w != '+'):
|
|
continue
|
|
|
|
# Ignore anything that looks like a company name
|
|
if 'pty' in words_lower or 'ltd' in words_lower or 'australia' in words_lower:
|
|
continue
|
|
|
|
# Strip all trailing words that look like a dose
|
|
for i in reversed(range(len(words))):
|
|
if LOOKS_LIKE_DOSE.fullmatch(words[i]):
|
|
del words[i]
|
|
else:
|
|
break
|
|
|
|
# OK!
|
|
brand_name = ' '.join(words)
|
|
|
|
if tpp['mp_code'] not in brand_names:
|
|
brand_names[tpp['mp_code']] = set()
|
|
|
|
brand_names[tpp['mp_code']].add(brand_name)
|
|
|
|
# Reduce names with unambiguous prefixes
|
|
for mp_code in sorted(brand_names.keys()):
|
|
for brand_name in list(brand_names[mp_code]):
|
|
# Can we reduce the length of the name?
|
|
words = brand_name.split()
|
|
|
|
for i in range(1, len(words)):
|
|
short_name = ' '.join(words[0:i])
|
|
|
|
if any(b.startswith(short_name) for m in brand_names.keys() if m != mp_code for b in brand_names[m]):
|
|
# Conflict
|
|
continue
|
|
|
|
# Exceptions
|
|
if short_name == 'Coloxyl with':
|
|
continue
|
|
|
|
# Can shorten
|
|
if brand_name in brand_names[mp_code]:
|
|
brand_names[mp_code].remove(brand_name)
|
|
brand_names[mp_code].add(short_name)
|
|
break
|
|
|
|
# Add to database
|
|
for mp_code in sorted(brand_names.keys()):
|
|
for brand_name in sorted(list(brand_names[mp_code])):
|
|
cur.execute('INSERT INTO pbs_mp_brand_name (mp_code, brand_name) VALUES (?, ?)', (mp_code, brand_name))
|
|
|
|
con.commit()
|