2023-01-24 19:58:17 +11:00
#!/usr/bin/env python3
2023-01-24 19:56:54 +11:00
# Copyright © 2023 Lee Yingtong Li (RunasSudo)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import re
import sqlite3
LOOKS_LIKE_DOSE = re . compile ( r ' ([0-9/.,+ % ]*( ?(mg|mL|U|IU))?)+ ' )
# Open database
con = sqlite3 . connect ( ' database.db ' )
con . row_factory = sqlite3 . Row
cur = con . cursor ( )
# Init schema
2023-02-04 15:48:44 +11:00
cur . execute ( ' DROP TABLE IF EXISTS mp_brand_name ' )
cur . execute ( ' CREATE TABLE mp_brand_name (id INTEGER PRIMARY KEY AUTOINCREMENT, mp_preferred_term TEXT, brand_name TEXT) ' )
2023-01-24 19:56:54 +11:00
2023-02-04 15:48:44 +11:00
# Get PBS brand names
cur . execute ( ' SELECT brand_name, mp_preferred_term FROM pbs_tpp LEFT JOIN (SELECT code, mp_code FROM pbs_mpp) AS pbs_mpp ON pbs_tpp.mpp_code = pbs_mpp.code LEFT JOIN (SELECT code, preferred_term as mp_preferred_term FROM pbs_mp) AS pbs_mp ON pbs_mpp.mp_code = pbs_mp.code ' )
2023-01-24 19:56:54 +11:00
brand_names = { }
for tpp in cur . fetchall ( ) :
words = tpp [ ' brand_name ' ] . split ( )
words_lower = tpp [ ' brand_name ' ] . lower ( ) . split ( )
# If any word of the generic name is in the brand name, skip it because it is uninteresting
if any ( w . lower ( ) in tpp [ ' brand_name ' ] . lower ( ) for w in tpp [ ' mp_preferred_term ' ] . split ( ) if w != ' + ' ) :
continue
# Ignore anything that looks like a company name
if ' pty ' in words_lower or ' ltd ' in words_lower or ' australia ' in words_lower :
continue
# Strip all trailing words that look like a dose
for i in reversed ( range ( len ( words ) ) ) :
if LOOKS_LIKE_DOSE . fullmatch ( words [ i ] ) :
del words [ i ]
else :
break
# OK!
brand_name = ' ' . join ( words )
2023-02-04 15:48:44 +11:00
if tpp [ ' mp_preferred_term ' ] not in brand_names :
brand_names [ tpp [ ' mp_preferred_term ' ] ] = set ( )
2023-01-24 19:56:54 +11:00
2023-02-04 15:48:44 +11:00
brand_names [ tpp [ ' mp_preferred_term ' ] ] . add ( brand_name )
# Get non-PBS brand names
cur . execute ( ' SELECT * FROM non_pbs_tpp ' )
for tpp in cur . fetchall ( ) :
# This is manually curated so no need for cleaning
if tpp [ ' mp_preferred_term ' ] not in brand_names :
brand_names [ tpp [ ' mp_preferred_term ' ] ] = set ( )
brand_names [ tpp [ ' mp_preferred_term ' ] ] . add ( tpp [ ' brand_name ' ] )
2023-01-24 19:56:54 +11:00
# Reduce names with unambiguous prefixes
2023-02-04 15:48:44 +11:00
for mp_preferred_term in sorted ( brand_names . keys ( ) ) :
for brand_name in list ( brand_names [ mp_preferred_term ] ) :
2023-01-24 19:56:54 +11:00
# Can we reduce the length of the name?
words = brand_name . split ( )
for i in range ( 1 , len ( words ) ) :
short_name = ' ' . join ( words [ 0 : i ] )
2023-02-04 15:48:44 +11:00
if any ( b . startswith ( short_name ) for m in brand_names . keys ( ) if m != mp_preferred_term for b in brand_names [ m ] ) :
2023-01-24 19:56:54 +11:00
# Conflict
continue
2023-02-04 14:09:58 +11:00
# Exceptions
if short_name == ' Coloxyl with ' :
continue
2023-01-24 19:56:54 +11:00
# Can shorten
2023-02-04 15:48:44 +11:00
if brand_name in brand_names [ mp_preferred_term ] :
brand_names [ mp_preferred_term ] . remove ( brand_name )
brand_names [ mp_preferred_term ] . add ( short_name )
2023-01-24 19:56:54 +11:00
break
# Add to database
2023-02-04 15:48:44 +11:00
for mp_preferred_term in sorted ( brand_names . keys ( ) ) :
for brand_name in sorted ( list ( brand_names [ mp_preferred_term ] ) ) :
cur . execute ( ' INSERT INTO mp_brand_name (mp_preferred_term, brand_name) VALUES (?, ?) ' , ( mp_preferred_term , brand_name ) )
2023-01-24 19:56:54 +11:00
con . commit ( )