#!/usr/bin/env python3
#
#
# GNU GENERAL PUBLIC LICENSE
#
# Version 3, 29 June 2007
#
# Copyright © 2025 Jeremy Heyl
#
# Everyone is permitted to copy and distribute verbatim copies of
# this license document, but changing it is not allowed.
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Consult for the CCV XML schema.
#
#
#
import sys
from datetime import datetime
publov={
'Submitted' : 'Submitted',
'Revision Requested' : 'Revision Requested',
'Accepted' : 'Accepted',
'In Press' : 'In Press',
'Published' : 'Published' }
yesnolov={
'Yes' : 'Yes',
'No' : 'No',
}
typelov={
'Paper' : 'Paper',
'Abstract' : 'Abstract',
'Poster' : 'Poster',
}
def escape( str_xml: str ):
str_xml = str_xml.replace("&", "&")
str_xml = str_xml.replace("<", "<")
str_xml = str_xml.replace(">", ">")
str_xml = str_xml.replace("\"", """)
str_xml = str_xml.replace("'", "'")
return str_xml
def okval(s):
return (str(s)!='nan')
def getval(row,column,lov=None,default=None):
if lov is None:
try:
val=row[column]
except:
val="nan"
if okval(val):
return '%s' % val
else:
if default is None:
return ''
else:
return default
else:
try:
return lov[row[column]]
except:
if default is None:
return ''
else:
return lov[default]
def outputjournal(row,f):
f.write('''
%s
%s
%s
%s
%s
%s
%04d
%s
%s
%s
''' % (escape(row['Article Title'][:250]),
escape(row['Source Title'][:200]),
getval(row,'Volume'),
getval(row,'Issue'),
('%s' % row['Article Number'] if okval(row['Article Number'])
else (('%s' % row['Start Page'])+( '-%s' % row['End Page'] if okval(row['End Page']) else ''))
if okval(row['Start Page']) else ''),
getval(row,"Publishing Status",publov,"Published"),
row['Publication Year'],
getval(row,"Refereed?",yesnolov,"Yes"),
escape(row['Author Full Names'][:1000]),
getval(row,"DOI")))
def outputconference(row,f):
f.write('''
%s
%s
%s
%s
%04d
%s
%s
%s
%s
''' % ( getval(row,"Conference Publication Type",typelov,"Paper"),
escape(row['Article Title'][:250]),
escape(row['Conference Title'][:250]),
getval(row,"Publishing Status",publov,"Published"),
row['Publication Year'],
getval(row,"Refereed?",yesnolov,"No"),
getval(row,"Invited?",yesnolov,"No"),
escape(row['Author Full Names'][:1000]),
getval(row,"DOI")))
if len(sys.argv)<3:
print('''
Format:
python excel2xml.py inputexcel outputxml
This script will convert an Excel file of references downloaded from
Web of Science to an XML file that can be imported to the CCV website
from the Utilities tab to generate a NSERC compliant CCV. The import
will replace entire sections of the CCV that you can designate, so
make a backup by exporting the XML first.
The script requires Pandas to be installed in your python3 environment.
Depending on your pandas implementation, it may only read the first
fifty lines of the xls file. If so, convert it to xlsx and give that a
try.
The script will select for all journal articles to be published and
refereed and all conference publications to be published, papers, not
refereed and not invited. You should now have a valid NSERC
bibliography that you can correct hopefully just a few entries to
reflect their statuses.
By adding additional rows to the spreadsheet, you can add
contributions such as submitted papers that are not in Web of Science.
Similarly, by adding additional columns to the spreadsheet called
1) Publishing Status
2) Refereed?
3) Invited?
4) Conference Publication Type
you can override the defaults for any publication. If you leave the
entry blank or choose an invalid value, the default will be used, so
that you will still have a valid NSERC CCV. The valid values for each
field are:
Publishing Status: Submitted, Revision Requested, Accepted, In Press or
Published
Refereed?: Yes or No
Invited?: Yes or No
Conference Publication Type: Paper, Abstract, Poster
''')
else:
import pandas as pd
df=pd.read_excel(sys.argv[1])
with open(sys.argv[2],'w', encoding='UTF-8') as f:
today=datetime.today()
f.write(f'''
''')
for index, row in df.iterrows():
if row['Publication Type']=='J':
outputjournal(row,f)
for index, row in df.iterrows():
if row['Publication Type']=='C':
outputconference(row,f)
f.write('''
''')