diff --git a/import_pbs_xml.py b/import_pbs_xml.py index 492b794..2e3d8e8 100755 --- a/import_pbs_xml.py +++ b/import_pbs_xml.py @@ -140,10 +140,26 @@ for mpp_id in sorted(list(mpps_to_parse)): mp = root.find('pbs:drugs-list', ns).find('pbs:mp[@xml:id="' + mp_id + '"]', ns) mp_code = mp.find('pbs:code[@rdf:resource="http://pbs.gov.au/Drug/MP"]', ns).text # Must look this up because the in is only SNOMED + # Manual fixups for metoprolol + if 'METOPROLOL' in mpp_preferred_term: + # Incorrect capitalisation + mpp_preferred_term = mpp_preferred_term.replace('METOPROLOL SUCCINATE Tablet', 'metoprolol succinate') + mpp_preferred_term = mpp_preferred_term.replace('METOPROLOL TARTRATE Tablet', 'metoprolol tartrate') + + # Idiosyncratic word order + mpp_preferred_term = mpp_preferred_term.replace('(controlled release)', 'modified release tablet') + mpp_preferred_term = mpp_preferred_term.replace('mg,', 'mg tablet,') + + # Classify as "metoprolol tartrate" + if mp_code == '1187PBSC': + mp_id = None + mp_code = '432PBSC' + cur.execute('INSERT INTO pbs_mpp (code, mp_code, preferred_term) VALUES (?, ?, ?)', (mpp_code, mp_code, mpp_preferred_term)) # Queue the MP for parsing - mps_to_parse.add(mp_id) + if mp_id: + mps_to_parse.add(mp_id) # Get TPPs for tpp_reference in mpp.find('pbs:drug-references-list', ns).findall('pbs:tpp-reference', ns): @@ -158,6 +174,10 @@ for mp_id in sorted(list(mps_to_parse)): mp_code = mp.find('pbs:code[@rdf:resource="http://pbs.gov.au/Drug/MP"]', ns).text # Also there are SNOMED codes but they are inconsistent mp_preferred_term = mp.find('pbs:preferred-term[@rdf:resource="http://pbs.gov.au/clinical"]', ns).text + if mp_code == '432PBSC': + # Specified as all uppercase in PBS XML for some reason + mp_preferred_term = 'metoprolol' + cur.execute('INSERT INTO pbs_mp (code, preferred_term) VALUES (?, ?)', (mp_code, mp_preferred_term)) # Parse TPPs