Source code for pygcc.read_db

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 17 16:02:22 2021

@author: adedapo.awolayo and Ben Tutolo, University of Calgary

Copyright (c) 2020 - 2021, Adedapo Awolayo and Ben Tutolo, University of Calgary

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""

import re, os
import textwrap
J_to_cal = 4.184

# from sys import platform

[docs]def findcodecs(filename): """Function to find the name of the encoding used to decode or encode any file """ data = open(filename, "rb").read() # data = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), filename), 'rb').read() all_codecs = ['ascii', 'latin_1', 'utf_8'] f = [0]*len(all_codecs) for j, i in enumerate(all_codecs): try: decoded = data.decode(i) except UnicodeDecodeError: f[j] = False else: for ch in decoded: if i == 'utf_8' and 0xD800 <= ord(ch) <= 0xDFFF: f[j] = False f[j] = True if all(f) == True: return None else: return all_codecs[1]
# if platform == "darwin": # OS X # return all_codecs[1] # elif platform in ["linux", "linux2", "win32"]: # linux and # Windows... # return None
[docs]class db_reader: """Class to read direct-access and source thermodynamic database Parameters ---------- dbaccess : string filename and location of the direct-access/sequential-access database, optional, default is speq21 dbBerman_dir : string filename and location of the Berman mineral database, optional dbHP_dir : string filename and location of the supcrtbl mineral and gas database, optional sourcedb : string filename of the source database, optional sourcedb : string filename of the source database, optional sourceformat : string specify the source database format, either 'GWB' or 'EQ36', optional dbaccessformat : string, optional specify the direct-access/sequential-access database format, either 'speq' or 'supcrtbl', default is 'speq' sourcedb_codecs : string specify the name of the encoding used to decode or encode the sourcedb file, optional dbaccess_codecs : string specify the name of the encoding used to decode or encode the dbaccess file, optional Returns ------- dbaccessdic : dict dictionary of minerals, gases, redox and aqueous species \n dbaccess : string direct-access database file name \n sourcedic : dict dictionary of reaction coefficients and species \n specielist : list list of species segmented into the different categories [element, basis, redox, aqueous, minerals, gases, oxides] \n speciecat : list list of species categories listed in 'specielist' \n chargedic : dict dictionary of charges of species \n MWdic : dict dictionary of MW of species \n Mineraltype : dict mineral type for minerals \n fugacity_info : dict fugacity information as stored in new tdat database for chi and critical ppts \n Sptype : dict specie types and eq3/6 and revised date info \n Elemlist : dict dictionary of elements and coefficients \n Examples -------- >>> ps = db_reader(sourcedb = './default_db/thermo.com.dat', sourceformat = 'gwb', dbaccess = './default_db/speq21.dat') >>> ps.sourcedic, ps.dbaccessdic, ps.specielist """ kwargs = {"dbaccess": None, "dbBerman_dir": None, "dbHP_dir": None, "sourcedb": None, "sourceformat": None, "dbaccessformat": 'speq', "sourcedb_codecs": None, "dbaccess_codecs": None} def __init__(self, **kwargs): self.kwargs = db_reader.kwargs.copy() self.__calc__(**kwargs)
[docs] def __calc__(self, **kwargs): self.kwargs.update(kwargs) if self.kwargs["dbaccess"] == 'speq23': self.dbaccess_dir = './default_db/speq23.dat' self.dbaccess_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.dbaccess_dir) elif self.kwargs["dbaccess"] == 'speq23_dimer': self.dbaccess_dir = './default_db/speq23_dimer.dat' self.dbaccess_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.dbaccess_dir) elif self.kwargs["dbaccess"] == 'speq21_dimer': self.dbaccess_dir = './default_db/speq21_dimer.dat' self.dbaccess_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.dbaccess_dir) elif self.kwargs['dbaccess'] is None: self.dbaccess_dir = './default_db/speq21.dat' self.dbaccess_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.dbaccess_dir) else: self.dbaccess_dir = self.kwargs["dbaccess"] self.dbaccessformat = self.kwargs["dbaccessformat"] self.dbBerman_dir = self.kwargs["dbBerman_dir"] self.dbHP_dir = self.kwargs["dbHP_dir"] self.sourcedb_dir = None if self.kwargs["sourcedb"] is None else self.kwargs["sourcedb"] if self.kwargs["sourceformat"] is not None: if self.kwargs["sourceformat"].lower() == 'gwb': # options for all default database included with PyGCC if self.kwargs["sourcedb"] == 'thermo.com': self.sourcedb = './default_db/thermo.com.dat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) elif self.kwargs["sourcedb"] == 'thermo.2021': self.sourcedb = './default_db/thermo.2021.dat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) elif self.kwargs["sourcedb"] == 'thermo_latest': self.sourcedb = './default_db/thermo_latest.tdat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) elif self.kwargs["sourcedb"] == 'thermo_cemdata_mar': self.sourcedb = './default_db/thermo_cemdata_mar.tdat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) elif self.kwargs["sourcedb"] is None: self.sourcedb = './default_db/thermo.com.tdat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) else: self.sourcedb_dir = self.kwargs["sourcedb"] elif self.kwargs["sourceformat"].lower() == 'eq36': if self.kwargs["sourcedb"] == 'data0' or self.kwargs["sourcedb"] is None: self.sourcedb = './default_db/data0.dat' self.sourcedb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.sourcedb) else: self.sourcedb_dir = self.kwargs["sourcedb"] if self.kwargs['dbaccess_codecs'] is None: self.dbaccess_codecs = findcodecs(self.dbaccess_dir) else: self.dbaccess_codecs = self.kwargs['dbaccess_codecs'] if self.kwargs["sourcedb"] is not None and self.kwargs['sourcedb_codecs'] is None: self.sourcedb_codecs = findcodecs(self.sourcedb_dir) else: self.sourcedb_codecs = self.kwargs['sourcedb_codecs'] if self.dbaccess_dir is not None: self.dbaccess = self.dbaccess_dir.split('/')[-1] if self.dbaccess_dir is not None: self.readAqspecdb() # if self.sourcedb_dir is not None: if self.kwargs["sourceformat"].lower() == 'gwb': self.readSourceGWBdb() elif self.kwargs["sourceformat"].lower() == 'eq36': self.readSourceEQ36db()
[docs] def readAqspecdb(self): """ This function reads direct access thermodynamic database and can add other database sources at the bottom returns all constants of Maier-Kelley power function for minerals and gases (dG [cal/mol], dH [cal/mol], S [cal/mol-K], V [cm3/mol] a [cal/mol-K], b [*10**3 cal/mol/K^2], c [*10^-5 cal/mol/K], Ttrans [K], Htr [cal/mol], Vtr [cm³/mol], dPdTtr [bar/K] ) and aqueous species (dG [cal/mol], dH [cal/mol], S [cal/mol-K], V [cm3/mol], a1 [*10 cal/mol/bar], a2 [*10**-2 cal/mol], a3 [cal-K/mol/bar], a4 [*10**-4 cal-K/mol], c1 [cal/mol/K], c2 [*10**-4 cal-K/mol], ω [*10**-5 cal/mol] ) packed into a dbacess dictionary. In addition, the function can read Berman's mineral properties such as (dG [J/mol], dH [J/mol], S [J/mol-K], V [cm³/mol], k0, k1, k2, k3, v1 [*10^5 K^-1], v2 [*10^5 K^-2], v3 [*10^5 bar^-1], v4 [*10^8 bar^-2], dTdP [K/bar], Tlambda [K], Tref [K], l1 [(J/mol)^0.5/K], l2 [(J/mol)^0.5/K^2], DtH, d0 [J/mol], d1 [J/mol], d2 [J/mol], d3 [J/mol], d4 [J/mol], d5 [J/mol], Tmin [K], Tmax [K]). In addition, the function can read supcrtbl's mineral and gas properties such as (dG [kJ/mol], dH [kJ/mol], S [J/mol-K], V [J/bar], a [kJ/mol-K], b [*10^5 kJ/mol/K^2], c [kJ-mol-K], d [kJ/mol/K^0.5], alpha [*10^5 K^-1], kappa0 [kbar], kappa0_d [kbar], kappa0_dd [kbar], n_atom [-], Tc0 [K], Smax [J/mol-K], Vmax [J/bar], dH [KJ/mol], dV [J/bar], W [kJ/mol], Wv [J/bar], n [-], SF [-]) \n Parameters ---------- dbaccess filename and location of the direct-access database \n dbBerman_dir filename and location of the Berman mineral database \n dbHP_dir filename and location of the supcrtbl (Holland and Powell) mineral and gas database \n Returns ---------- dbaccessdic dictionary of minerals, gases, redox and aqueous species \n dbaccess dat file name \n Usage: ---------- [dbaccessdic, dbname] = readAqspecdb(dbaccess) """ # check if its a single liner data codecs = self.dbaccess_codecs with open(self.dbaccess_dir, encoding = codecs) as g: Rd = g.readlines() header_counter = [p for p, k in enumerate(Rd) if k.startswith('*************')] def multiline_reader(Rd, counter, dbaccess_dir): db_dic = {}; last_gas = '' for i in range(len(Rd)): # s1 = Rd[i].rstrip('\n').strip() if (not s1.lstrip().startswith(('*', '!'))) and (s1.lstrip('0123456789.- \t') != ""): if (s1[:3] != 'ref') and (s1[:3] != 'REF') and (s1.split()[0] not in ['minerals', 'gases', 'gas', 'aqueous', 'abandoned']): name = s1.strip().split()[0] name = name.replace('+4', '++++') if name.endswith('+4') else name.replace('+3', '+++') if name.endswith('+3') else name.replace('+2', '++') if name.endswith('+2') else name.replace('-4', '----') if name.endswith('-4') else name.replace('-3', '---') if name.endswith('-3') else name.replace('-2', '--') if name.endswith('-2') else name if self.dbaccessformat.lower() == 'supcrtbl': name = name.title() if not name.endswith(('+', '-', ",aq", "(S)", "(am)", 'dis', 'ord')) else name.capitalize() if 'ACID' in name else name name = name.replace(",aq", "(aq)").replace(",G", "(g)").replace("(S)", "(s)").replace("(Am)", "(am)").replace("(Alpha)", "(alpha)").replace("(Beta)", "(beta)") name = name.replace("-acid", "_acid").replace("(High)", "_high").replace("(Low)", "_low").replace(" anhyd", "_anhyd").replace(" hydr", "_hydr") name = name.replace("-Lo", "_low").replace("(-Hi)", "_high").replace("(oh)", "(OH)").replace("(Oh)", "(OH)").replace("(G)", "(g)").replace("(Ordered)", "-ord") if len(s1.split()) > 1: formula = s1.split()[1] else: formula = '' s2 = Rd[i + 1].strip() dates = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] if (s2[:3] != 'ref') and (s2[:3] != 'REF') and (s2[0].lstrip('0123456789.,- ') != '') and any(k in s2.split()[-1].lower() for k in dates) == False: s3 = Rd[i+2]; s4 = Rd[i+3]; s5 = Rd[i+4]; if i >= len(Rd) - 5: s6 = 'Null'; s7 = 'Null'; s8 = 'Null'; s9 = 'Null'; s10 = 'Null'; s11 = 'Null'; elif i >= len(Rd) - 6: s6 = Rd[i + 5]; s7 = 'Null'; s8 = 'Null'; s9 = 'Null'; s10 = 'Null'; s11 = 'Null'; else: s6 = Rd[i + 5]; s7 = Rd[i + 6]; s8 = Rd[i + 7]; s9 = Rd[i + 8]; s10 = Rd[i + 9]; s11 = Rd[i + 10]; if s3.strip()[:3] == 'ref': ref = s3#.split()[0][4:] else: ref = s3#.split()[0] if (s6.lower().islower() == True) | s6.startswith('*', 0): params = s4.split() + s5.split() elif (s7.lower().islower() == True) | s7.startswith('*', 0): params = s4.split() + s5.split() + s6.split() elif (s8.lower().islower() == True) | s8.startswith('*', 0): params = s4.split() + s5.split() + s6.split() + s7.split() elif (s9.lower().islower() == True) | s9.startswith('*', 0): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() elif (s10.lower().islower() == True) | s10.startswith('*', 0): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() elif (s11.lower().islower() == True) | s11.startswith('*', 0): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() + s10.split() else: params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() + s10.split() + s11.split() params = [float(i) if float(i) != 999999 else 0 for i in params] counter += 1 if name in db_dic.keys(): print('Duplicate found for species "%s" in %s' % (name, dbaccess_dir.split('/')[-1])) continue else: db_dic[name] = [formula, ref] + params if len(s5.split()) != 0 and len(s6.split()) and (s5.strip() == '' and s6.strip() == ''): break elif len(s7.split()) != 0 and len(s6.split()) and (s6.strip() == '' and s7.strip() == ''): break elif len(s7.split()) != 0 and len(s8.split()) and (s7.strip() == '' and s8.strip() == ''): break elif len(s8.split()) != 0 and len(s9.split()) and (s8.strip() == '' and s9.strip() == ''): break elif len(s9.split()) != 0 and len(s10.split()) and (s9.strip() == '' and s10.strip() == ''): break elif len(s6.split()) != 0 and (s6.split()[-1] in ['(nmin1)', '(nmin2)', '(nmin3)', '(nmin4)', '(ngas)', '(naqs)']): break elif len(s7.split()) != 0 and (s7.split()[-1] in ['(nmin1)', '(nmin2)', '(nmin3)', '(nmin4)', '(ngas)', '(naqs)']): break elif len(s8.split()) != 0 and (s8.split()[-1] in ['(nmin1)', '(nmin2)', '(nmin3)', '(nmin4)', '(ngas)', '(naqs)']) : break elif len(s9.split()) != 0 and (s9.split()[-1] in ['(nmin1)', '(nmin2)', '(nmin3)', '(nmin4)', '(ngas)', '(naqs)']) : break elif len(s10.split()) != 0 and (s10.split()[-1] in ['(nmin1)', '(nmin2)', '(nmin3)', '(nmin4)', '(ngas)', '(naqs)']): break elif s9.strip('*').strip() == '' and s10.strip('*').strip() == '' and s11.strip('*').strip() == '': break if s1.split()[0] in ['gases', 'gas']: last_mineral = list(db_dic.keys())[-1] # aqueous if s1.lstrip().split()[0] in ['aqueous']: last_gas = list(db_dic.keys())[-1] # last_gas = '' if last_gas is None else last_gas return db_dic, last_mineral, last_gas self.dbaccessdic = {}; counter = 0 # if it is single line data like for dpeq20 if len(Rd) <= 1: width = re.search(' 3 ', Rd[0]).start() Rd = textwrap.wrap(Rd[0], width=width) for i in range(len(Rd)): # s1 = Rd[i] if not s1.startswith(('*', ' '),0) | (s1.rstrip('\n').lstrip('0123456789.- ') == "") | (s1[0] == "-") : if (s1.strip()[:3] != 'ref') and (s1.strip()[:3] != 'REF'): name = s1.strip().split()[0] if len(s1.strip().split()) > 1: formula = s1.strip().split()[1] else: formula = '' s2 = Rd[i + 1] if (s2.strip()[:3] != 'ref') and (s2.strip()[:3] != 'REF') and (s2.split()[0].lstrip('0123456789.,- ') != ''): s3 = Rd[i+2]; s4 = Rd[i+3]; s5 = Rd[i+4]; if s3.split()[0].lstrip('0123456789.,- ') != '': if i >= len(Rd) - 5: s6 = 'Null'; s7 = 'Null'; s8 = 'Null'; s9 = 'Null'; s10 = 'Null'; s11 = 'Null'; elif i >= len(Rd) - 6: s6 = Rd[i + 5]; s7 = 'Null'; s8 = 'Null'; s9 = 'Null'; s10 = 'Null'; s11 = 'Null'; else: s6 = Rd[i + 5]; s7 = Rd[i + 6]; s8 = Rd[i + 7]; s9 = Rd[i + 8]; s10 = Rd[i + 9]; s11 = Rd[i + 10]; if s3.strip()[:3] == 'ref': ref = s3#.split()[0][4:] else: ref = s3#.split()[0] if (s6.lower().islower() == True) | (s6.strip().split()[0] == name): params = s4.split() + s5.split() elif (s7.lower().islower() == True) | (s7.strip().split()[0] == name): params = s4.split() + s5.split() + s6.split() elif (s8.lower().islower() == True) | (s8.strip().split()[0] == name): params = s4.split() + s5.split() + s6.split() + s7.split() elif (s9.lower().islower() == True) | (s9.strip().split()[0] == name): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() elif (s10.lower().islower() == True) | (s10.strip().split()[0] == name): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() elif (s11.lower().islower() == True) | (s11.strip().split()[0] == name): params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() + s10.split() else: params = s4.split() + s5.split() + s6.split() + s7.split() + s8.split() + \ s9.split() + s10.split() + s11.split() params = [float(i) if float(i) != 999999 else 0 for i in params] counter += 1 if name in self.dbaccessdic.keys(): print('Duplicate found for species "%s" in %s' % (name, self.dbaccess_dir.split('/')[-1])) continue else: self.dbaccessdic[name] = [formula, ref] + params else: # for multi line data like for speq21 self.dbaccessdic, last_mineral, last_gas = multiline_reader(Rd, counter, self.dbaccess_dir) if self.dbBerman_dir is not None: codecs = findcodecs(self.dbBerman_dir) mineral_list = list(self.dbaccessdic.keys())[:list(self.dbaccessdic.keys()).index(last_mineral)+1] self.dbaccessdic = {k: v for k, v in self.dbaccessdic.items() if k not in mineral_list} specie_name = [] with open(self.dbBerman_dir, encoding = codecs) as g: for i, line in enumerate(g, 1): if line.strip('*').startswith('COMMENTS'): break if (not line.lstrip().startswith(('!', 'ST', 'C1', 'C2', 'C3', 'D1', 'D2', 'T1', 'T2', 'V1', '*'))) and (line.lstrip('0123456789.- \t\n') != ""): specie_name.append(line.strip().split()[0]) gid = open(self.dbBerman_dir, 'r', encoding = codecs) for i in range(5000): s1 = gid.readline() if s1.strip('*').startswith('MINERAL DATA'): break s4 = '0'; s5 = '0'; s6 = '0' for i in range(5000): s1 = s4 if s4.strip().split()[0] in specie_name else s5 if s5.strip().split()[0] in specie_name else s6 if s6.strip().split()[0] in specie_name else gid.readline() if s1.strip('*').startswith('COMMENTS')|s6.strip('*').startswith('COMMENTS')|s5.strip('*').startswith('COMMENTS'): break if (not s1.lstrip().startswith(('!', 'ST', 'C1', 'C2', 'C3', 'D1', 'D2', 'T1', 'T2', 'V1'))) and (s1.lstrip('0123456789.- \t\n') != ""): name = s1.strip().split()[0] if len(s1.split()) > 1: formula = s1.split()[1] else: formula = '' s2 = gid.readline(); s2 = gid.readline() if s2.lstrip().startswith('!') else s2; s3 = gid.readline() s4 = gid.readline() if (s3.strip().split()[0] not in specie_name) else '0' s4 = s4 + '0' if s4.strip() == '' else s4 s5 = gid.readline() if s4.strip().split()[0] not in specie_name else '0' s5 = s5 + '0' if s5.strip() == '' else s5 s6 = gid.readline() if s5.strip().split()[0] not in specie_name else '0' s6 = s6 + '0' if s6.strip() == '' else s6 params = s2.split()[1:-1] if s2.lstrip().startswith('ST') else s3.split()[1:-1] if s3.lstrip().startswith('ST') else s4.split()[1:-1] if s4.lstrip().startswith('ST') else s5.split()[1:-1] if s5.lstrip().startswith('ST') else [0]*4 params += s3.split()[1:-1] if s3.lstrip().startswith('C1') else s4.split()[1:-1] if s4.lstrip().startswith('C1') else s5.split()[1:-1] if s5.lstrip().startswith('C1') else s6.split()[1:-1] if s6.lstrip().startswith('C1') else [0]*4 # params += s3.split()[1:-2] if s3.lstrip().startswith('C2') else s4.split()[1:-2] if s4.lstrip().startswith('C2') else s5.split()[1:-2] if s5.lstrip().startswith('C2') else s6.split()[1:-2] if s6.lstrip().startswith('C2') else [0]*3 params += s3.split()[1:-1] if s3.lstrip().startswith('V1') else s4.split()[1:-1] if s4.lstrip().startswith('V1') else s5.split()[1:-1] if s5.lstrip().startswith('V1') else s6.split()[1:-1] if s6.lstrip().startswith('V1') else [0]*4 params += s3.split()[1:2] if s3.lstrip().startswith('T2') else s4.split()[1:2] if s4.lstrip().startswith('T2') else s5.split()[1:2] if s5.lstrip().startswith('T2') else s6.split()[1:2] if s6.lstrip().startswith('T2') else [0] params += s3.split()[1:] if s3.lstrip().startswith('T1') else s4.split()[1:] if s4.lstrip().startswith('T1') else s5.split()[1:] if s5.lstrip().startswith('T1') else s6.split()[1:] if s6.lstrip().startswith('T1') else [0]*5 params += s3.split()[1:-1] if s3.lstrip().startswith('D1') else s4.split()[1:-1] if s4.lstrip().startswith('D1') else s5.split()[1:-1] if s5.lstrip().startswith('D1') else s6.split()[1:-1] if s6.lstrip().startswith('D1') else [] params += s3.split()[1:-1] if s3.lstrip().startswith('D2') else s4.split()[1:-1] if s4.lstrip().startswith('D2') else s5.split()[1:-1] if s5.lstrip().startswith('D2') else s6.split()[1:-1] if s6.lstrip().startswith('D2') else [] ref = 'Berman_1988' params = [float(i) if float(i) != 999999 else 0 for i in params] # print(name, params) if name in self.dbaccessdic.keys(): print('Duplicate found for species "%s" in %s' % (name, self.dbBerman_dir.split('/')[-1])) continue else: self.dbaccessdic[name] = [formula, ref] + params gid.close() elif self.dbHP_dir is not None: codecs = findcodecs(self.dbHP_dir) mineralgas_list = list(self.dbaccessdic.keys())[:list(self.dbaccessdic.keys()).index(last_gas)+1] self.dbaccessdic = {k: v for k, v in self.dbaccessdic.items() if k not in mineralgas_list} with open(self.dbHP_dir, encoding = codecs) as g: Rd = g.readlines() self.dbaccessdic.update(multiline_reader(Rd, 0, self.dbHP_dir)[0]) # read in the header reference list if len(header_counter) != 0: header_lines = Rd[header_counter[0]+1:header_counter[1]] if len(header_lines) > 1: checker = [i for i, x in enumerate(header_lines) if ' ... ' in x.lstrip()]; ref_list = [] for i in range(len(checker)-1): p = '' for k in range(checker[i],checker[i+1]): p = p + header_lines[k].lstrip('*').lstrip().strip('\n') ref_list.append(p) self.header_ref = dict(zip(*[iter([item.strip() for sublist in [k.split(' ... ') for k in ref_list] for item in sublist])]*2)) #%% other sources aside speq20 for solid solution calculation #dG dH S V a1 a2 a3 a4 a5 # dG, dH, S from Arnorsson 1999, V and Cp from Robie and Hemingway #1995 _Anorthite_ = ['Ca(Al2Si2)O8', 'R&H95, Stef2001',-4002095, -4227830, 199.30, 100.790*J_to_cal, 5.168e2, -9.249e-2, -1.408e6, -4.589e3, 4.188e-5] self.dbaccessdic['ss_Anorthite'] = [x/J_to_cal if type(x)!=str else x for x in _Anorthite_ ] self.dbaccessdic['ss_Albite_high'] = ['NaAlSi3O8', 'R&H95, Stef2001', -887368.32+8413/J_to_cal, -940769.52, 224.14/J_to_cal, 100.07, 139.56, -0.0221916826, 401051.6, -1535.37, 5.430210e-06] _K_feldspar_ = ['KAlSi3O8', 'R&H95, A&S99', -3745958, -3965730, 232.90, 108.960*J_to_cal, 6.934e2, -1.717e-1, 3.462e6, -8.305e3, 4.919e-5] self.dbaccessdic['ss_K-feldspar'] = [x/J_to_cal if type(x)!=str else x for x in _K_feldspar_ ] _Ferrosilite_ = ['FeSiO3', 'R&H95, Stef2001', -1118000, -1195200, 94.6, 33.0*J_to_cal, 1.243e2, 1.454e-2, -3.378e6, 0, 0] self.dbaccessdic['ss_Ferrosilite'] = [x/J_to_cal if type(x)!=str else x for x in _Ferrosilite_] _Enstatite_=['MgSiO3', 'R&H95, Stef2001', -1458300, -1545600, 66.3, 31.31*J_to_cal, 3.507e2, -1.472e-1, 1.769e6, -4.296e3, 5.826e-5] self.dbaccessdic['ss_Enstatite'] = [x/J_to_cal if type(x)!=str else x for x in _Enstatite_] _Clinoenstatite_=['MgSiO3', 'R&H95, Stef2001', -1458100, -1545000, 67.9, 31.28*J_to_cal, 2.056e2, -1.280e-2, 1.193e6, -2.298e3, 0] self.dbaccessdic['ss_Clinoenstatite'] = [x/J_to_cal if type(x)!=str else x for x in _Clinoenstatite_] _Hedenbergite_=['CaFeSi2O6', 'R&H95, Stef2001', -2676300, -2839900, 174.2, 67.950*J_to_cal, 3.1046e2, 1.257e-2, -1.846e6, -2.040e3, 0] self.dbaccessdic['ss_Hedenbergite'] = [x/J_to_cal if type(x)!=str else x for x in _Hedenbergite_] _Diopside_=['CaMgSi2O6', 'R&H95, Stef2001', -3026800, -3201500, 142.7 , 66.090*J_to_cal, 4.7025e2, -9.864e-2, 0.2454e6, -4.823e3, 2.813e-5] self.dbaccessdic['ss_Diopside'] = [x/J_to_cal if type(x)!=str else x for x in _Diopside_] #dG dH S V a1 a2 a3 a4 a5 _Forsterite_ = ['Mg2SiO4', 'R&H95, Stef2001', -2053600, -2171850, 94.1, 43.65*J_to_cal, 8.736e1, 8.717e-2, -3.699e6, 8.436e2, -2.237e-5] self.dbaccessdic['ss_Forsterite'] = [x/J_to_cal if type(x)!=str else x for x in _Forsterite_] _Fayalite_ = ['Fe2SiO4', 'R&H95, Stef2001', -1379100, -1478200, 151, 46.31*J_to_cal, 1.7602e2, -8.808e-3, -3.889e6, 0, 2.471e-5] self.dbaccessdic['ss_Fayalite'] = [x/J_to_cal if type(x)!=str else x for x in _Fayalite_] _Fluorapatite_ = ['Ca5(PO4)3F', 'R&H95', -6489700, -6872000, 387.9, 157.56*J_to_cal, 7.543e2, -3.026e-2, -0.9084e6, -6.201e3, 0] if 'Fluorapatite' not in self.dbaccessdic.keys(): self.dbaccessdic['Fluorapatite'] = [x/J_to_cal if type(x)!=str else x for x in _Fluorapatite_] _Hydroxyapatite_ = ['Ca5(OH)(PO4)3', 'R&H95', -6337100, -6738500, 390.4, 159.6*J_to_cal, 3.878e2, 11.186e-2, -12.70e6, 1.811e3, 0] if 'Hydroxyapatite' not in self.dbaccessdic.keys(): self.dbaccessdic['Hydroxyapatite'] = [x/J_to_cal if type(x)!=str else x for x in _Hydroxyapatite_] if 'Ankerite' not in self.dbaccessdic.keys(): self.dbaccessdic['Ankerite'] = ['CaFe(CO3)2', 'H&P2011 31.DEC.11\n', -434945.7, -471178.3, 45.043, 66.060, 81.500956, -0.277486, 0, -730.114720, 0] if 'Acmite' not in self.dbaccessdic.keys(): self.dbaccessdic['Acmite'] = ['NaFeSi2O6', 'H&P2011 31.DEC.11\n', -577476.5, -617454.6, 40.774, 64.590, 1.994e2, 6.197e-2, -4.267e6, 0, 0] _Annite_ = ['KFe3AlSi3O10(OH)2', 'R&H95', -4798300, -5149300, 415.0, 154.3*J_to_cal, 6.366e2, 8.208e-2, -4.860e6, -3.731e3, 0] self.dbaccessdic['ss_Annite'] = [x/J_to_cal if type(x)!=str else x for x in _Annite_] _Phlogopite_ = ['KMg3AlSi3O10(OH)2', 'R&H95', -5860500, -6246000, 315.9, 149.65*J_to_cal, 8.639e2, -7.6076e-2, 3.5206e5, -8.470e3, 0] self.dbaccessdic['ss_Phlogopite'] = [x/J_to_cal if type(x)!=str else x for x in _Phlogopite_] #dG dH S V a1 a2 a3 a4 a5 _Molybdenite_ = ['MoS2', 'R&H95', -262800, -271800, 62.6, 32.02*J_to_cal, 1.045e2, -4.812e-3, -6.291e3, -6.817e2, 0] if 'Molybdenite' not in self.dbaccessdic.keys(): self.dbaccessdic['Molybdenite'] = [x/J_to_cal if type(x)!=str else x for x in _Molybdenite_] _Molybdite_ = ['MoO3', 'R&H95', -668100, -745200, 77.7, 30.56*J_to_cal, 6.433e0, 6.278e-2, -2.46e6, 1.337e3, 0] if 'Molybdite' not in self.dbaccessdic.keys(): self.dbaccessdic['Molybdite'] = [x/J_to_cal if type(x)!=str else x for x in _Molybdite_] return
[docs] def readSourceGWBdb(self): """ This function reads source GWB thermodynamic database and reaction coefficients of 'eh' and 'e-' has been added at the bottom returns all reaction coefficients and species, group species into redox, minerals, gases, oxides and aqueous species Parameters ---------- sourcedb : filename of the source database Returns ---------- sourcedic : dictionary of reaction coefficients and species specielist : list of species segmented into the different categories [element, basis, redox, aqueous, minerals, gases, oxides] speciecat : list of species categories listed in 'specielist' chargedic : dictionary of charges of species MWdic : dictionary of MW of species Mineraltype : mineral type for minerals fugacity_info : fugacity information as stored in new tdat database for chi and critical ppts Usage: ---------- [sourcedic, specielist, chargedic, MWdic, Mineraltype, fugacity_info, activity_model] = readSourceGWBdb() """ codecs = self.sourcedb_codecs with open(self.sourcedb_dir, encoding = codecs) as g: for line in g: if line.startswith('activity model'): break with open(self.sourcedb_dir, encoding = codecs) as g: Rd = g.readlines() activity_model = line.strip('\n').split()[-1] data_fmt = [x for x in Rd if 'dataset format' in x][0].strip('\n').split(':')[-1].strip() fugacity_model = [x for x in Rd if 'fugacity model' in x][0].strip('\n').split(': ')[-1] if data_fmt != 'oct94' else '' unwanted = ['elements', 'basis species', 'redox couples', 'aqueous species', 'free electron', 'minerals', 'solid solutions', 'gases', 'oxides', 'stop.' ] #capture line numbers with line break d=[]; previousline = '' with open(self.sourcedb_dir, encoding = codecs) as fid: for idx, line in enumerate(fid, 1): if line.strip().rstrip('\n').lstrip('0123456789.- ') in unwanted: x=idx d.append(x-1) #elif line.strip(' \n*').startswith(('virial coefficients', 'Virial coefficients', 'SIT epsilon coefficients', 'Pitzer parameters')): elif previousline.startswith('-end-') and line.startswith('*'): x=idx; #print(x) d.append(x+1) break previousline = line if activity_model == 'h-m-w': if all([i.startswith('*') for i in Rd[d[-1]:d[-1]+30]]) == False: d_act = [i for i, x in enumerate(Rd[d[-1]:]) if x.strip('\n') ==''][0] else: d_act = [i for i, x in enumerate(Rd[d[-2]:]) if x.strip('\n') ==''][0] f = open(self.sourcedb_dir, 'r', encoding = codecs) #skip first 11 lines of database .lstrip('0123456789.- ') for i in range(d[1]+2): s1 = f.readline() self.sourcedic = {} # initialize dictionary for i in range(d[-1]-d[1]): # s1 = f.readline() if s1.strip(' \n*').startswith(("references", 'virial coefficients', 'Virial coefficients', 'SIT epsilon coefficients', 'Pitzer parameters')) : break if not s1.startswith((' ', '*'), 0) | (s1.rstrip('\n') == "") | (len(s1) !=0 and s1[0] == "-") : if s1.rstrip('\n').lstrip('0123456789.- ') in unwanted: continue elif s1.rstrip('\n').lstrip('0123456789.- ') == '': continue else: specie_name = s1.strip().split()[0] s2 = f.readline(); s3 = f.readline() if s2.rstrip('\n') != '' else '' s4 = f.readline() if s3.rstrip('\n') != '' else '' s5 = f.readline() if s4.rstrip('\n') != '' else '' if data_fmt == 'mar21' and not s3.split()[0].isdigit() and'mole' not in [s3.split()[0], s2.split()[0]] : if (s2.rstrip('\n') != "" and s3.rstrip('\n') == ""): ss_details = [s1, s2, s3] elif (s3.rstrip('\n') != "" and s4.rstrip('\n') == ""): ss_details = [s1, s2, s3, s4] elif (s4.rstrip('\n') != "" and s5.rstrip('\n') == ""): ss_details = [s1, s2, s3, s4, s5] elif (s5.rstrip('\n') != ""): s6 = f.readline() ss_details = [s1, s2, s3, s4, s5, s6] if (s6.rstrip('\n') != ""): s7 = f.readline() ss_details = [s1, s2, s3, s4, s5, s6, s7] if (s7.rstrip('\n') != ""): s8 = f.readline() ss_details = [s1, s2, s3, s4, s5, s6, s7, s8] if (s8.rstrip('\n') != ""): s9 = f.readline() ss_details = [s1, s2, s3, s4, s5, s6, s7, s8, s9] specie_formula = []; species_num = []; reactant = [] else: if (s5.rstrip('\n') != ""): s6 = f.readline() if (s6.rstrip('\n') != ""): s7 = f.readline() if not s2.startswith('*',0): if (len(s2.split()) > 1) and (s2.split()[0] != 'formula='): if len(s1.split('formula=')) <= 1: specie_formula = "" else: specie_formula = s1.rstrip('\n').split('formula=')[1] if not s3.lstrip().startswith(('chi', 'Pcrit'), 0): species_num = int(s3.split()[0]) if species_num <= 3: reactant = s4.split() elif species_num <= 6: reactant = s4.split() + s5.split() else: reactant = s4.split() + s5.split() + s6.split() else: if not s4.lstrip().startswith(('chi','Pcrit'),0): species_num = int(s4.split()[0]) if species_num <= 3: reactant = s5.split() elif species_num <= 6: reactant = s5.split() + s6.split() else: reactant = s5.split() + s6.split() + s7.split() else: species_num = int(s5.split()[0]) if species_num <= 3: reactant = s6.split() else: reactant = s6.split() + s7.split() else: if len(s2.split('formula=')) <= 1: specie_formula = "" else: specie_formula = s2.rstrip('\n').split('formula=')[1] species_num = int(s4.split()[0]) if species_num <= 3: reactant = s5.split() elif species_num <= 6: reactant = s5.split() + s6.split() else: reactant = s5.split() + s6.split() + s7.split() else: specie_formula = s2.split()[2] species_num = int(s4.split()[0]) if species_num <= 3: reactant = s5.split() elif species_num <= 6: reactant = s5.split() + s6.split() else: reactant = s5.split() + s6.split() + s7.split() else: specie_formula = "" species_num = int(s3.split()[0]) if species_num <= 3: reactant = s4.split() elif species_num <= 6: reactant = s4.split() + s5.split() else: reactant = s4.split() + s5.split() + s6.split() else: specie_formula = "" species_num = int(s3.split()[0]) if species_num <= 3: reactant = s4.split() elif species_num <= 6: reactant = s4.split() + s5.split() else: reactant = s4.split() + s5.split() + s6.split() ss_details = [] dt = [specie_formula, species_num] + reactant + ss_details self.sourcedic[specie_name] = dt[2:] if any(isinstance(el, list) for el in dt) else dt self.sourcedic['eh'] = ['eh', 3, '-2.0000', 'H2O', '1.0000', 'O2(g)', '4.0000', 'H+'] self.sourcedic['e-'] = ['e-', 3, '0.50000', 'H2O', '-0.2500', 'O2(g)', '-1.0000', 'H+'] f.close() element = []; basis = []; redox = []; aqueous = []; minerals = []; gases = []; oxides = []; solidsolutions = [] charge = []; MW = []; electron = []; self.Mineraltype = {}; fugacity_chi = {}; fugacity_Pcrit = {} with open(self.sourcedb_dir, encoding = codecs) as fid: for i, line in enumerate(fid, 1): previousline = line if (line.strip(' \n*').startswith(("references", 'Pitzer parameters', 'virial coefficients', 'Virial coefficients', 'SIT epsilon coefficients'))): break # elif previousline.startswith('-end-') and line.startswith('*'): # break if not line.startswith((' ','*'),0) | (line.rstrip('\n') == "") | (line[0] == "-") : if not line.split()[0].replace('.','',1).isnumeric(): if not line.startswith(('charge', 'mole', 'formula'), 0): if d[0] < i < d[1]: element.append(line.split()[0]) elif d[1] < i < d[2]: basis.append(line.split()[0]) elif d[2] < i < d[3]: redox.append(line.split()[0]) elif d[3] < i < d[4]: aqueous.append(line.split()[0]) elif d[4] < i < d[5]: if data_fmt == 'oct94': minerals.append(line.split()[0]) elif data_fmt in ['jul17', 'jan19', 'apr20', 'mar21'] : electron.append(line.split()[0]) elif d[5] < i < d[6]: if data_fmt == 'oct94': gases.append(line.split()[0]) elif data_fmt in ['jul17', 'jan19', 'apr20', 'mar21'] : minerals.append(line.split()[0]) elif i > d[6]: if data_fmt == 'oct94': oxides.append(line.split()[0]) elif d[6] < i < d[7]: if data_fmt in ['jul17', 'jan19', 'apr20'] : gases.append(line.split()[0]) elif data_fmt == 'mar21': solidsolutions.append(line.split()[0]) elif i > d[7]: if data_fmt in ['jul17', 'jan19', 'apr20'] : oxides.append(line.split()[0]) elif data_fmt == 'mar21': if d[7] < i < d[8]: gases.append(line.split()[0]) elif i > d[8]: oxides.append(line.split()[0]) if (re.compile(r"charge").search(line) != None) and not line.startswith('*'): charge.append(line) if (re.compile(r"mole wt.=").search(line) != None) and not line.startswith('*'): MW.append(re.sub('[^0123456789\.]', '', line.strip('\n').split('wt.=')[1])) if (re.compile(r"type=").search(line) != None) and not line.startswith('*'): if len(line.split()) <= 2: self.Mineraltype[line.split()[0]] = '' else: self.Mineraltype[line.split()[0]] = line.split()[2] if (re.compile(r"chi=").search(line) != None) and not line.startswith('*'): fugacity_chi[gases[-1]] = line if (re.compile(r"Pcrit=").search(line) != None) and not line.startswith('*'): fugacity_Pcrit[gases[-1]] = line act_list = []; #previousline = '' if activity_model == 'h-m-w': with open(self.sourcedb_dir, encoding = codecs) as fid: for i, line in enumerate(fid, 1): num = d[-1] + d_act if all([i.startswith('*') for i in Rd[d[-1]:d[-1]+30]]) == False else d[-2] + d_act if i > num: #d[-1] + d_act: if not line.startswith((' ', '\n', '-end-', '*')): act_list.append(line) self.act_param = {'activity_model': activity_model, 'act_list': act_list, 'dataset_format' : data_fmt} self.fugacity_info = {'fugacity_model': fugacity_model, 'fugacity_chi': fugacity_chi,'fugacity_Pcrit': fugacity_Pcrit} res = basis + redox + aqueous + electron self.chargedic = {res[i]: charge[i].rstrip('\n') for i in range(len(charge))} res = element + basis + redox + aqueous + electron + minerals + gases + oxides self.MWdic = {res[i]: float(MW[i]) for i in range(len(MW))} if data_fmt != 'mar21': self.specielist = [element, basis, redox, aqueous, electron, minerals, gases, oxides] self.speciecat = ['element', 'basis', 'redox', 'aqueous', 'electron', 'minerals', 'gases', 'oxides'] else: self.specielist = [element, basis, redox, aqueous, electron, minerals, solidsolutions, gases, oxides] self.speciecat = ['element', 'basis', 'redox', 'aqueous', 'electron', 'minerals', 'solidsolutions', 'gases', 'oxides'] return
[docs] def readSourceEQ36db(self): """ This function reads source EQ3/6 thermodynamic database and reaction coefficients of 'eh' and 'e-' has been added at the bottom returns all reaction coefficients and species, group species into basis, auxiliary basis, minerals, gases, liquids and aqueous species Parameters ---------- sourcedb : filename of the source database Returns ---------- sourcedic : dictionary of reaction coefficients and species specielist : list of species segmented into the different categories [element, basis, redox, aqueous, minerals, gases, oxides] speciecat : list of species categories listed in 'specielist' chargedic : dictionary of charges of species and DHazero info MWdic : dictionary of MW of species Sptype : specie types and eq3/6 and revised date info Elemlist : dictionary of elements and coefficients Usage: ---------- [sourcedic, specielist, chargedic, MWdic, block_info, Elemlist, act_param] = readSourceEQ36db(sourcedb) """ codecs = self.sourcedb_codecs with open(self.sourcedb_dir, encoding = codecs) as g: for line in g: if line.startswith(('bdot parameters', 'single-salt parameters', 'ca combinations')): break if line.startswith('bdot parameters'): activity_model = 'debye-huckel' elif line.startswith(('single-salt parameters', 'ca combinations')): activity_model = 'h-m-w' subheading = ['elements', 'basis species', 'auxiliary basis species', 'aqueous species', 'solids', 'liquids', 'gases', 'solid solutions', 'references', 'stop.'] d = [] with open(self.sourcedb_dir, encoding = codecs) as f: for idx, line in enumerate(f, 1): if line.strip().rstrip('\n').lstrip('0123456789.- ') in subheading: x=idx d.append(x-1) if line.startswith(('bdot parameters', 'single-salt parameters', 'ca combinations')): x=idx d.append(x-1) element = []; basis = []; auxiliary = []; aqueous = []; minerals = []; MW = []; #DH = [] gases = []; liquids = []; charge = []; solid_solutions = []; act_params = [] with open(self.sourcedb_dir, encoding = codecs) as f: for i, line in enumerate(f, 1): if (line.rstrip('\n') == "references"): break if not line.startswith('*',0) | line.startswith(' ',0) | (line.rstrip('\n') == "") | (line[0] == "-") : if not line.split()[0].replace('.','',1).isnumeric() and not line.startswith('+',0): if line.strip().rstrip('\n').lstrip('0123456789.- ') not in subheading: line = line.strip().rstrip('\n') if d[1] < i < d[2]: element.append(line.split()[0]) MW.append(float(line.split()[1])) elif d[2] < i < d[3]: basis.append(line.split(' ')[0]) elif d[3] < i < d[4]: if any(re.findall(r'|'.join(('acid', 'high', 'low')), line.split(' ')[0], re.IGNORECASE)): auxiliary.append(line.split(' ')[0].replace(' ', '_')) else: auxiliary.append(line.split(' ')[0]) elif d[4] < i < d[5]: if any(re.findall(r'|'.join(('acid', 'high', 'low')), line.split(' ')[0], re.IGNORECASE)): aqueous.append(line.split(' ')[0].replace(' ', '_')) else: aqueous.append(line.split(' ')[0]) elif d[5] < i < d[6]: if any(re.findall(r'|'.join(('acid', 'high', 'low', 'anhyd', 'hydr')), line.split(' ')[0], re.IGNORECASE)): minerals.append(line.split(' ')[0].replace(' ', '_')) else: minerals.append(line.split(' ')[0]) elif d[6] < i < d[7]: liquids.append(line.split(' ')[0]) elif d[7] < i < d[8]: gases.append(line.split(' ')[0]) elif i > d[8]: solid_solutions.append(line.split(' ')[0]) if re.compile(r"charge").search(line) != None: charge.append(line) if d[0] < i < d[1]: act_params.append(line) self.MWdic = {element[i]: float(MW[i]) for i in range(len(MW))} res = basis + auxiliary + aqueous + minerals + liquids + gases + solid_solutions lstname = []; self.block_info = {}; self.Elemlist = {}; self.sourcedic = {} self.act_param = {'activity_model': activity_model, 'act_list': None, 'alpha_beta' : {}, 'theta':{}, 'lambda':{}, 'psi':{},'zeta':{},'mu':{}, 'beta0' : {}, 'beta1' : {}, 'beta2' : {}, 'alpha1' : {}, 'alpha2' : {}, 'cphi' : {}} keywords = [["+" + "-"*30, "+" + "-"*30] ] for num, k in enumerate(keywords): lst = []; counter = 0 f = open(self.sourcedb_dir, 'r', encoding = codecs) for i in range(d[1] + len(element)):# s1 = f.readline() if (s1.rstrip('\n') == "elements") : break act_list = [act_params[i +1] for i,x in enumerate(act_params) if x.rstrip('\n').startswith(k[0])] act_list = [x for x in act_list if not x.startswith(('*', 'nn', 'nc', 'cc', 'mixture'))] if (activity_model == 'h-m-w') and any(s1.lstrip().rstrip('\n').startswith(x) for x in act_list): lst = []; lst.append(s1) for j in range(50): s = f.readline() if s.lstrip().rstrip('\n').startswith(k[1]): break elif not s.lstrip().startswith(('****', '*', 'single-salt parameters', 'ca combinations')) | (s.rstrip('\n').strip('0123456789.- ') in ["", 'Miscellaneous parameters']+subheading): lst.append(s) if len(lst) > 2: if any(['mu' in x for x in lst]): checker = [x.strip(' \n').split()[-1] for i, x in enumerate(lst) if x.lstrip().startswith('mu')] if re.sub('[^0123456789\.]', '', checker[0]) == '': first_a = [i for i, x in enumerate(lst) if x.lstrip().startswith('a')][0] mu = float(lst[first_a].split()[-1]) else: mu = float(re.sub('[^0123456789\.]', '', checker[0])) self.act_param['mu'][lst[0].rstrip('\n')] = mu if any(['zeta' in x for x in lst]): checker = [x.strip(' \n').split()[-1] for i, x in enumerate(lst) if x.lstrip().startswith('zeta')] if re.sub('[^0123456789\.]', '', checker[0]) == '': first_a = [i for i, x in enumerate(lst) if x.lstrip().startswith('a')][0] zeta = float(lst[first_a].split()[-1]) else: zeta = float(re.sub('[^0123456789\.]', '', checker[0])) self.act_param['zeta'][lst[0].rstrip('\n')] = zeta if any([('alpha' or 'beta') in x for x in lst]): self.act_param['alpha_beta'][lst[0].rstrip('\n')] = lst lster = ['beta0', 'beta1', 'beta2', 'alpha1', 'alpha2', 'cphi'] for pos in lster: checker = [x.strip(' \n') for i, x in enumerate(lst) if pos in re.sub('[()]', '', x).lstrip().lower()] if all([x in checker[0] for x in lster[:3]]): checker = checker[0].replace("=", "").split() par = float(checker[checker.index(pos) + 1]) elif all([x in checker[0] for x in lster[3:5]]): checker = checker[0].replace("=", "").split() par = float(checker[checker.index(pos) + 1]) else: # checker = checker.split()[-1] if re.sub('[^0123456789\.]', '', re.sub('[(012)]', '', checker[0])) == '': first_a = [i for i, x in enumerate(lst) if pos in re.sub('[()]', '', x).lstrip().lower() ][0] + 1 par = float(lst[first_a].split()[-1]) else: par = float(re.sub('[^0123456789\.]', '', re.sub('[()]', '', checker[0]))) self.act_param[pos][lst[0].rstrip('\n')] = par if any(['psi' in x for x in lst]): checker = [x.strip(' \n') for i, x in enumerate(lst) if 'psi' in x.lstrip() ] if all([x in checker[0] for x in [' psi', 'theta']]): checker = checker[0].replace("=", "").split() psi = float(checker[checker.index('psi') + 1]) else: if re.sub('[^0123456789\.]', '', checker[0]) == '': first_a = [i for i, x in enumerate(lst) if x.lstrip().startswith('a')][0] psi = float(lst[first_a].split()[-1]) else: psi = float(re.sub('[^0123456789\.]', '', checker[0])) self.act_param['psi'][lst[0].rstrip('\n')] = psi if any(['theta' in x for x in lst]): checker = [x.strip(' \n') for i, x in enumerate(lst) if 'theta' in x.lstrip() ] if all([x in checker[0] for x in ['theta', ' psi']]): checker = checker[0].replace("=", "").split() theta = float(checker[checker.index('theta') + 1]) else: if re.sub('[^0123456789\.]', '', checker[0]) == '': first_a = [i for i, x in enumerate(lst) if x.lstrip().startswith('a')][0] theta = float(lst[first_a].split()[-1]) else: theta = float(re.sub('[^0123456789\.]', '', checker[0])) self.act_param['theta'][lst[0].rstrip('\n')] = theta if any(['lambda' in x for x in lst]): checker = [x.strip(' \n').split()[-1] for i, x in enumerate(lst) if 'lambda' in x.lstrip()] if re.sub('[^0123456789\.]', '', checker[0]) == '': first_a = [i for i, x in enumerate(lst) if x.lstrip().startswith('a')][0] lambdaa = float(lst[first_a].split()[-1]) else: lambdaa = float(re.sub('[^0123456789\.]', '', checker[0])) self.act_param['lambda'][lst[0].rstrip('\n')] = lambdaa for i in range(d[-1]) : # s = f.readline() if (s.rstrip('\n') == "references") : break s = s.replace(" acid", "_acid").replace(" high", "_high").replace(" low", "_low").replace(" anhyd", "_anhyd").replace(" hydr", "_hydr") if any(s.lstrip().rstrip('\n').startswith(x) for x in res): lst = []; lst.append(s); counter += 1 for j in range(50): s = f.readline() if s.lstrip().rstrip('\n').startswith(k[1]): break elif not s.lstrip().startswith('****'): lst.append(s) lst[0] = lst[0].rstrip('\n') if lst[0] not in subheading: if lst[0].split(' ')[0] in res or lst[0].split()[0].replace(' ', '_') in res: if any(re.findall(r'|'.join(('acid', 'high', 'low', 'anhyd', 'hydr')), lst[0].split(' ')[0], re.IGNORECASE)): specie_name = lst[0].split(' ')[0].replace(' ', '_') else: specie_name = lst[0].split(' ')[0] lstname.append(specie_name) if specie_name not in solid_solutions: indx, elem_numb =[(i,int(re.sub('[^0123456789\.]', '', x))) for i,x in enumerate(lst) if x.strip('0123456789.,-: ').startswith('element(s)')][0] elem_rows = list(range(indx + 1, indx + 2)) if elem_numb <= 3 else list(range(indx + 1, indx + 3)) if elem_numb <= 6 else list(range(indx + 1, indx + 4)) if elem_numb <= 9 else list(range(indx + 1, indx + 5)) reactant = [lst[x].rstrip('\n').split(' ') for x in elem_rows] reactant = [item for sublist in reactant for item in sublist] # convert list of list to list reactant = [y for x in reactant for y in x.split() if x != '' and x != '****'] self.Elemlist[specie_name] = reactant else: elem_rows = [20] if specie_name not in basis[:-1] + solid_solutions and counter > len(basis): indx, species_num =[(i,int(re.sub('[^0123456789\.]', '', x))) for i,x in enumerate(lst) if x.strip('0123456789.,-: ').startswith('species in')][0] spec_rows = list(range(indx + 1, indx + 2)) if species_num < 3 else list(range(indx + 1, indx + 3)) if species_num < 5 else list(range(indx + 1, indx + 4)) if species_num < 7 else list(range(indx + 1, indx + 5)) if species_num < 9 else list(range(indx + 1, indx + 6)) reactants = [lst[x].rstrip('\n').split(' ') for x in spec_rows] reactants = [item for sublist in reactants for item in sublist] # convert list of list to list reactants = [y for x in reactants for y in x.split() if x != '' and x != '****'] reactants = [j.replace(' ', '_') if any(re.findall(r'|'.join(('acid', 'high', 'low', 'anhyd', 'hydr')), j, re.IGNORECASE)) else j for j in reactants] else: spec_rows = [20] if specie_name in solid_solutions: indx, species_num =[(i,int(re.sub('[^0123456789\.]', '', x))) for i,x in enumerate(lst) if x.strip('0123456789.,-: ').startswith(('components', 'end members'))][0] spec_rows = list(range(indx + 1, indx + 2)) if species_num < 3 else list(range(indx + 1, indx + 3)) if species_num < 5 else list(range(indx + 1, indx + 4)) if species_num < 7 else list(range(indx + 1, indx + 5)) if species_num < 9 else list(range(indx + 1, indx + 6)) reactants = [lst[x].rstrip('\n').split(' ') for x in spec_rows] reactants = [item for sublist in reactants for item in sublist] # convert list of list to list reactants = [y for x in reactants for y in x.split() if x != '' and x != '****'] mw = [x for x in lst if x.strip('* ').startswith('mol.wt.')] mw = float(re.sub('[^0123456789\.]', '', mw[0].split()[-2])) if len(mw) != 0 else [] if len(lst[0].split()) <= 1: specie_formula = "" elif len(lst[0].split()) > 2: specie_formula = lst[0].split()[2] else: specie_formula = lst[0].split()[1] if (specie_name == 'O2(g)') and counter <= len(basis): self.block_info['%s_b' % specie_name] = lst[1:min(elem_rows,spec_rows)[0] - 1] elif specie_name in basis[:-1] + auxiliary + aqueous + minerals + liquids + gases: self.block_info[specie_name] = lst[1:min(elem_rows,spec_rows)[0] - 1] elif specie_name in solid_solutions: self.block_info[specie_name] = [lst[1:min(spec_rows) - 1], lst[(max(spec_rows) + 1):]] if specie_name in basis and counter <= len(basis): self.sourcedic[specie_name] = [specie_formula, elem_numb] + reactant elif specie_name == 'O2(g)': self.sourcedic[specie_name] = [specie_formula, species_num] + reactants else: self.sourcedic[specie_name] = [specie_formula, species_num] + reactants self.MWdic[specie_name] = mw self.sourcedic['eh'] = ['eh', 4, '-1.0000', 'eh', '-2.0000', 'H2O', '1.0000', 'O2(g)', '4.0000', 'H+'] self.sourcedic['e-'] = ['e-', 4, '-1.0000', 'e-', '0.50000', 'H2O', '-0.2500', 'O2(g)', '-1.0000', 'H+'] self.specielist = [element, basis, auxiliary, aqueous, minerals, liquids, gases, solid_solutions] self.speciecat = ['element', 'basis', 'redox', 'aqueous', 'minerals', 'liquids', 'gases', 'solid_solutions'] res = basis + auxiliary + aqueous self.chargedic = {res[i]: charge[i].rstrip('\n') for i in range(len(charge))} self.act_param['act_list'] = act_list if activity_model == 'h-m-w' else '' self.act_param['activity_model'] = activity_model f.close() return