Source code for aleimi.processed

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from glob import glob
import os
from rmsd import kabsch_rmsd

[docs] def psi4_out_read(out:str): """ Parameters ---------- out : str psi4 put file name. Returns ------- check_end, check_freq, Electronic energy, Gibbs free energy, exyz, xyz2RMSD_H. """ with open(out, 'rt', encoding='latin-1') as file: lines = file.readlines() if lines[-1].strip() == '*** Psi4 exiting successfully. Buy a developer a beer!': check_end = True else: check_end = False check_freq = False E = 0 G = 0 exyz = [] xyz2RMSD_H =[] for i, _ in enumerate(lines): if 'Final (previous) structure:' in lines[i]: E = float(lines[i-1].split()[-1]) for j in range(i+2,len(lines)): if 'Saving final (previous) structure.' in lines[j]: break split = lines[j].split() exyz.append([split[0], float(split[1]), float(split[2]), float(split[3])]) if 'H' not in lines[j]: xyz2RMSD_H.append([float(split[1]), float(split[2]), float(split[3])]) if 'Freq [cm^-1]' in lines[i]: check_freq = True freqs = lines[i].split()[2:] for f in freqs: # psi4 represent the imaginary number as 5i, # if an error ocurreduring the float conversion, # or it was converted but if less than cero (another error on sqrt) # then check_freq = False try: np.sqrt(float(f)) except: check_freq = False break if 'Total G, Free enthalpy at 298.15 [K]' in lines[i]: G = float(lines[i].split('[K]')[1].split()[0]) exyz = pd.DataFrame(exyz) xyz2RMSD_H = np.array(xyz2RMSD_H, dtype=float) return check_end, check_freq, E, G, exyz, xyz2RMSD_H
[docs] def main(SubDirs:bool = True, engine:str = 'psi4', xyz_out:bool = False) -> dict: """Process the output of the QM calculations generated through :meth:`aleimi.extractor.main`. This function is quiet limited for now, Only useful for psi4 engine. Parameters ---------- SubDirs : bool, optional Should be True if :meth:`aleimi.extractor.main` was used with ``mkdir = True``, by default True engine : str, optional psi4, gaussian or orca. It depends on the engine defined on :meth:`aleimi.extractor.main` was used with ``engine`` keyword, by default 'psi4' xyz_out : bool, optional If True, it will write the xyz coordinates of the conformer with the lowest energy, by default False Returns ------- dict conf_name: 3D_coordinates Raises ------ FileNotFoundError If there is not .out files """ if SubDirs: outs = [out for out in glob('*/*.out') if 'myjob' not in out] first_names = set() for out in outs: first_names.add(os.path.basename(out).split('_Cell')[0]) first_names = list(first_names) else: outs = [out for out in glob('*.out') if 'myjob' not in out] first_names = set() for out in outs: first_names.add(out.split('_Cell')[0]) first_names = list(first_names) if not outs: raise FileNotFoundError('No se pueden encontrar los archivos .out') if engine == 'psi4': reader = psi4_out_read elif engine == 'orca': pass elif engine == 'gaussian': pass else: pass wrong_freq = [] wrong_end = [] coord_lower_energy = {} for first_name in first_names: to_work = [] if SubDirs: for out in outs: if os.path.basename(out).split('_Cell')[0] == first_name: to_work.append(out) else: for out in outs: if out.split('_Cell')[0] == first_name: to_work.append(out) Gibbs_free_energies = [] coords = [] coord2RMSD_Hs =[] good = [] for item in to_work: check_end, check_freq, E_total_energy, Gibbs_free_energy, coord, coord2RMSD_H = reader(item) if check_end: if check_freq==True: good.append(os.path.basename(item)) Gibbs_free_energies.append(Gibbs_free_energy) coords.append(coord) coord2RMSD_Hs.append(coord2RMSD_H) else: wrong_freq.append(item) else: wrong_end.append(item) if good: paired = list(zip(good, Gibbs_free_energies, coords, coord2RMSD_Hs)) ORDERED = sorted(paired, key=lambda x: x[1]) coord_lower_energy[ORDERED[0][0].split('.')[0]] = ORDERED[0][2] to_print = [] for i, _ in enumerate(ORDERED): to_print.append([ORDERED[i][0].split('_Cell_')[-1].split('.')[0], ORDERED[i][1]]) to_print = pd.DataFrame(to_print, columns = ['Cell', 'Gibbs (hartree/partícula)']) print(first_name) print(to_print) print('\n') with open(first_name+'.orden', 'wt') as final: to_print.to_string(final) rmsd_matrix = np.zeros((len(ORDERED),len(ORDERED))) for i, _ in enumerate(rmsd_matrix): for j, _ in enumerate(rmsd_matrix): if j < i: rmsd_matrix[i][j] = rmsd_matrix[j][i] elif j == i: rmsd_matrix[i][j] == 0.0 else: # ============================================================= # Calculando RMSD # ============================================================= P = ORDERED[i][3] Q = ORDERED[j][3] rmsd_matrix[i][j] = kabsch_rmsd(P, Q, translate = True) index = [g.split('_Cell_')[-1].split('.')[0] for g in good] to_print_rmsd_matrix = pd.DataFrame(rmsd_matrix, index = index, columns = index) with open(first_name+'.rmsd', 'wt') as final: to_print_rmsd_matrix.to_string(final) #========================Se exporta el .xyz================== if xyz_out: with open(ORDERED[0][0].split('.')[0]+'.xyz', 'wt') as final: final.write(str(len(ORDERED[0][2]))+'\n\n') ORDERED[0][2].to_string(final, header=False, index=False) #====================================================================== if wrong_freq: print(f'Las cálculos: {wrong_freq} presentaron frecuencias negativas o imaginarias.') if wrong_end: print(f'Las cálculos: {wrong_end} presentaron problemas para un correcta finalizacion. Check the psi4 output!') return coord_lower_energy
if __name__ == '__main__':...