Source code for pyssian.chemistryutils

"""
Contains all the hardcoded basis and functionals as well as the neccesary
regex expressions to identify if a certain string is a valid basis or a valid
methods. It also contains a dictionary that maps Atomic Symbol with Atomic
Number and vice versa.
"""

import re

######################## METHODS SECTION #####################################
shellWaveFunctions = 'RO|U|R|'
mutlipleMethodRegex = 'ONIOM|IRCMAX'

methodsRegex = ['DFT','MM','Amber','Dreiding','UFF','AM1','PM3','PM3MM','PM6',
                'PDDG','HF','HFS','XAlpha','HFB','VSXC','HCTH','HCTH93',
                'HCTH147','HCTH407','tHCTH','M06L','B97D','LSDA','LC-wPBE',
                'CAM-B3LYP','wB97XD','wB97','wB97X','LC-BLYP','B3LYP','B3P86',
                'B3PW91','B1B95','mPW1PW91','mPW1LYP','mPW1PBE','mPW3PBE',
                'B98','B971','B972','PBE1PBE','B1LYP','O3LYP','TPSSh','BMK',
                'M062X','M06','M06HF','M05','M052X','X3LYP','BHandH',
                'BHandHLYP','tHCTHhyb','HSEh1PBE','HSE2PBE','HSEhPBE',
                'PBEh1PBE','CASSCF','CAS','MP2','MP3','MP4','MP5','B2PLYP',
                'B2PLYPD','mPW2PLYP','mPW2PLYPD','QCISD','CCD',r'CCSD\(T\)',
                'CCSD','CC','QCID','BD','EPT','CBS-4M','CBS-QB3','ROCBS-QB3',
                'CBS-APNO','G1','G2','G2MP2','G3','G3MP2','G3B3','G3MP2B3','G4',
                'G4MP2','W1U','W1BD','W1RO','CIS',r'CIS\(D\)','CID','CISD','TD',
                'EOMCCSD','ZINDO','DFTB','DFTBA','GVB','CNDO','INDO','MINDO',
                'MNDO','NMR','SAC-CI']
methodsRegex = [i.upper() for i in methodsRegex]
methodsRegex = '|'.join(methodsRegex)

exchangeFunctional = ['S','XA','B','PW91','MPW','G96','PBE','OPBE','O','TPSS',
                      'BRX','PKZB','WPBEH','PBEH','LC-']
exchangeFunctional = '|'.join(exchangeFunctional)

correlationFunctional = ['VWN','VWN5','LYP','PL','P86','PW91','B95','PBE',
                         'TPSS','KCIS','BRC','PKZB','VP86','V5LYP']
correlationFunctional = '|'.join(correlationFunctional)

methodRegex = f'({shellWaveFunctions})({methodsRegex})'
compoundMethodRegex = '({0})({1})[0-9]*({2})'.format(shellWaveFunctions,
                                                 exchangeFunctional,
                                                 correlationFunctional)

method_re = re.compile(methodRegex)
multiplemethod_re = re.compile(mutlipleMethodRegex)
compoundmethod_re = re.compile(compoundMethodRegex)


###################### THE PRETTY FUNCTION ######################
[docs] def is_method(candidate): """ Tests if a candidate string is a valid method recognized by Gaussian. Parameters ---------- candidate : str Returns ------- bool """ candidate = candidate.upper() test1 = multiplemethod_re.match(candidate) test2 = method_re.match(candidate) test3 = compoundmethod_re.match(candidate) if test1 is not None: test1 = test1[0] == candidate if test2 is not None: test2 = test2[0] == candidate if test3 is not None: test3 = test3[0] == candidate return bool(test1 or test2 or test3)
######################## BASIS SECTION ######################################### ## Pople Bases require lowercasing for design reasons. Pople = dict(primitives = r'sto|mc|[0-9]', zeta = '[0-9]{1,3}', diffuse = r'[\+]{0,2}', pol1 = r'[0-9]?(d|f|d\'|f\'){1,2}', pol2 = r'[0-9]?(\,p|d|p\'|d\')?') x1 = Pople['pol1'] x2 = Pople['pol2'] Pople['polarization'] = r'\(({pol1}{pol2})\)|\*|\*\*'.format(pol1=x1,pol2=x2) Pople_Basis = r'({primitives})\-({zeta})({diffuse})g({polarization})?' Pople_Basis = Pople_Basis.format(**Pople) # The resulting regex is this one but I wanted the reasoning to be written down # (sto|mc|[0-9])\-([0-9]{1,3})([\+]{0,2})g(\([0-9]{0,1}(d|f|d\'|f\'){1,2}\,{0,1}[0-9]{0,1}(p|d|p\'|d\'){0,1}\)){0,1} ## ccBases ## Assumes lowercased, it's simpler so its not divided cc_Basis = r'((sp|d)?t?(aug|jul|jun|may|apr){1}\-)?cc\-pv(d|t|q|5|6)z' #((sp|d){0,1}t{0,1}(aug|jul|jun|may|apr){1}\-){0,1}cc\-pv(d|t|q|5|6)z # UGBS UGBS_Basis = r'ugbs[1-9](p|v|o)(2?\+{1,2})?' # SV Family SV_Basis = r'(def2)?(sv|tzv?|qzv?)p{0,2}' #Requires the usage of match # Family that includes indicating the number of core electrons core_Basis = r'(sdd|shf|sdf|mhf|mdf|mwb|oldsdd|sddall)[0-9]{1,3}' # D95 Family D95_Basis = 'd95v?('+Pople['polarization']+')' # CEP family CEP_Basis = r'cep\-?[0-9]{1,3}g('+Pople['polarization']+')' # SHC Basis SHC_Basis = 'shc('+Pople['polarization']+')' # Or to hardcode all the family Hardcoded_basis = set(['sec','lanl2mb','lanl2dz','midix','epr-ii','epr-iii', 'mtsmall','dgdzvp','dgdzvp2','dgtzvp','cbsb7','gen','genecp']) # Construct the full regex all_basis = [Pople_Basis,UGBS_Basis,cc_Basis,SV_Basis,core_Basis, D95_Basis,CEP_Basis,SHC_Basis] basis_expr = '|'.join([f'({i})' for i in all_basis]) #print(basis_expr) basis_regex = re.compile(basis_expr) ######################## THE PRETTY FUNCTION ###################################
[docs] def is_basis(candidate): """ Tests if a candidate string is a valid basis set recognized by Gaussian. Parameters ---------- candidate : str Returns ------- bool """ lowercandidate = candidate.lower() in_hardcoded = lowercandidate in Hardcoded_basis match = basis_regex.match(lowercandidate) # Has to be match and not search if match: in_regex = match[0] == lowercandidate else: in_regex = False return bool(in_hardcoded or in_regex)
######################### PERIODIC TABLE ####################################### # It is important that X appears the first one (enumeration starts in 0) # here X stands for a dummy atom items = """X H He Li Be B C N O F Ne Na Mg Al Si P S Cl Ar K Ca Sc Ti V Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Kr Rb Sr Y Zr Nb Mo Tc Ru Rh Pd Ag Cd In Sn Sb Te I Xe Cs Ba La Ce Pr Nd Pm Sm Eu Gd Tb Dy Ho Er Tm Yb Lu Hf Ta W Re Os Ir Pt Au Hg Tl Pb Bi Po At Rn Fr Ra Ac Th Pa U Np Pu Am Cm Bk Cf Es Fm Md No Lr Rf Db Sg Bh Hs Mt Ds Rg Uub Uut Uuq Uup Uuh Uus Uuo """ items = items.replace('\n',' ').strip().split() PeriodicTable = dict() for i,Sym in enumerate(items): PeriodicTable[i] = Sym # 1 -> 'H' PeriodicTable[str(i)] = Sym # '1' -> 'H' PeriodicTable[Sym] = i # 'H' -> 1