#!/usr/bin/env python3 # # # GNU GENERAL PUBLIC LICENSE # # Version 3, 29 June 2007 # # Copyright © 2025 Jeremy Heyl # # Everyone is permitted to copy and distribute verbatim copies of # this license document, but changing it is not allowed. # # This program is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Consult for the CCV XML schema. # # # import sys from datetime import datetime publov={ 'Submitted' : 'Submitted', 'Revision Requested' : 'Revision Requested', 'Accepted' : 'Accepted', 'In Press' : 'In Press', 'Published' : 'Published' } yesnolov={ 'Yes' : 'Yes', 'No' : 'No', } typelov={ 'Paper' : 'Paper', 'Abstract' : 'Abstract', 'Poster' : 'Poster', } def escape( str_xml: str ): str_xml = str_xml.replace("&", "&") str_xml = str_xml.replace("<", "<") str_xml = str_xml.replace(">", ">") str_xml = str_xml.replace("\"", """) str_xml = str_xml.replace("'", "'") return str_xml def okval(s): return (str(s)!='nan') def getval(row,column,lov=None,default=None): if lov is None: try: val=row[column] except: val="nan" if okval(val): return '%s' % val else: if default is None: return '' else: return default else: try: return lov[row[column]] except: if default is None: return '' else: return lov[default] def outputjournal(row,f): f.write('''
%s %s %s %s %s %s %04d %s %s %s
''' % (escape(row['Article Title'][:250]), escape(row['Source Title'][:200]), getval(row,'Volume'), getval(row,'Issue'), ('%s' % row['Article Number'] if okval(row['Article Number']) else (('%s' % row['Start Page'])+( '-%s' % row['End Page'] if okval(row['End Page']) else '
')) if okval(row['Start Page']) else ''), getval(row,"Publishing Status",publov,"Published"), row['Publication Year'], getval(row,"Refereed?",yesnolov,"Yes"), escape(row['Author Full Names'][:1000]), getval(row,"DOI"))) def outputconference(row,f): f.write('''
%s %s %s %s %04d %s %s %s %s
''' % ( getval(row,"Conference Publication Type",typelov,"Paper"), escape(row['Article Title'][:250]), escape(row['Conference Title'][:250]), getval(row,"Publishing Status",publov,"Published"), row['Publication Year'], getval(row,"Refereed?",yesnolov,"No"), getval(row,"Invited?",yesnolov,"No"), escape(row['Author Full Names'][:1000]), getval(row,"DOI"))) if len(sys.argv)<3: print(''' Format: python excel2xml.py inputexcel outputxml This script will convert an Excel file of references downloaded from Web of Science to an XML file that can be imported to the CCV website from the Utilities tab to generate a NSERC compliant CCV. The import will replace entire sections of the CCV that you can designate, so make a backup by exporting the XML first. The script requires Pandas to be installed in your python3 environment. Depending on your pandas implementation, it may only read the first fifty lines of the xls file. If so, convert it to xlsx and give that a try. The script will select for all journal articles to be published and refereed and all conference publications to be published, papers, not refereed and not invited. You should now have a valid NSERC bibliography that you can correct hopefully just a few entries to reflect their statuses. By adding additional rows to the spreadsheet, you can add contributions such as submitted papers that are not in Web of Science. Similarly, by adding additional columns to the spreadsheet called 1) Publishing Status 2) Refereed? 3) Invited? 4) Conference Publication Type you can override the defaults for any publication. If you leave the entry blank or choose an invalid value, the default will be used, so that you will still have a valid NSERC CCV. The valid values for each field are: Publishing Status: Submitted, Revision Requested, Accepted, In Press or Published Refereed?: Yes or No Invited?: Yes or No Conference Publication Type: Paper, Abstract, Poster ''') else: import pandas as pd df=pd.read_excel(sys.argv[1]) with open(sys.argv[2],'w', encoding='UTF-8') as f: today=datetime.today() f.write(f'''
''') for index, row in df.iterrows(): if row['Publication Type']=='J': outputjournal(row,f) for index, row in df.iterrows(): if row['Publication Type']=='C': outputconference(row,f) f.write('''
''')