Source code for aleimi.processed

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from glob import glob
import os
from rmsd import kabsch_rmsd


[docs]
def psi4_out_read(out:str):
    """
    Parameters
    ----------
    out : str
        psi4 put file name.
    Returns
    -------
    check_end, check_freq, Electronic energy, Gibbs free energy, exyz, xyz2RMSD_H.
    """

    with open(out, 'rt', encoding='latin-1') as file:
        lines = file.readlines()

    if lines[-1].strip() == '*** Psi4 exiting successfully. Buy a developer a beer!':
        check_end = True
    else:
        check_end = False
    check_freq = False
    E = 0
    G = 0
    exyz = []
    xyz2RMSD_H =[]

    for i, _ in enumerate(lines):
        if 'Final (previous) structure:' in lines[i]:
            E = float(lines[i-1].split()[-1])
            for j in range(i+2,len(lines)):
                if 'Saving final (previous) structure.' in lines[j]: break
                split = lines[j].split()
                exyz.append([split[0], float(split[1]), float(split[2]), float(split[3])])
                if 'H' not in lines[j]:
                    xyz2RMSD_H.append([float(split[1]), float(split[2]), float(split[3])])
        if 'Freq [cm^-1]' in lines[i]:
            check_freq = True
            freqs = lines[i].split()[2:]
            for f in freqs:
                # psi4 represent the imaginary number as 5i,
                # if an error ocurreduring the float conversion, 
                # or it was converted but if less than cero (another error on sqrt) 
                # then check_freq = False
                try:
                    np.sqrt(float(f))
                except:
                    check_freq = False
                    break
        if 'Total G, Free enthalpy at  298.15 [K]' in lines[i]:
            G = float(lines[i].split('[K]')[1].split()[0])
        
    exyz = pd.DataFrame(exyz)
    xyz2RMSD_H = np.array(xyz2RMSD_H, dtype=float)
    return check_end, check_freq, E, G, exyz, xyz2RMSD_H



[docs]
def main(SubDirs:bool = True, engine:str = 'psi4', xyz_out:bool = False) -> dict:
    """Process the output of the QM calculations generated through :meth:`aleimi.extractor.main`.
    This function is quiet limited for now, Only useful for psi4 engine.

    Parameters
    ----------
    SubDirs : bool, optional
        Should be True if :meth:`aleimi.extractor.main` was used with ``mkdir = True``, by default True
    engine : str, optional
        psi4, gaussian or orca. It depends on the engine defined on :meth:`aleimi.extractor.main` was used with ``engine`` keyword, by default 'psi4'
    xyz_out : bool, optional
        If True, it will write the xyz coordinates of the conformer with the lowest energy, by default False

    Returns
    -------
    dict
        conf_name: 3D_coordinates

    Raises
    ------
    FileNotFoundError
        If there is not .out files
    """
    if SubDirs:
        outs = [out for out in glob('*/*.out') if 'myjob' not in out]

        first_names = set()
        for out in outs:
            first_names.add(os.path.basename(out).split('_Cell')[0])
        first_names = list(first_names)
    else:
        outs = [out for out in glob('*.out') if 'myjob' not in out]
        
        first_names = set()
        for out in outs:
            first_names.add(out.split('_Cell')[0])
        first_names = list(first_names)
    if not outs: raise FileNotFoundError('No se pueden encontrar los archivos .out')

    if engine == 'psi4':
        reader = psi4_out_read
    elif engine == 'orca':
        pass
    elif engine == 'gaussian':
        pass
    else:
        pass

    wrong_freq = []
    wrong_end = []

    coord_lower_energy = {}
    for first_name in first_names:
        to_work = []
        if SubDirs:
            for out in outs:
                if os.path.basename(out).split('_Cell')[0] == first_name:
                    to_work.append(out)
        else:
            for out in outs:
                if out.split('_Cell')[0] == first_name:
                    to_work.append(out)

        Gibbs_free_energies = []
        coords = []
        coord2RMSD_Hs =[]
        good = []

        for item in to_work:
            check_end, check_freq, E_total_energy, Gibbs_free_energy, coord, coord2RMSD_H = reader(item)
            if check_end:
                if check_freq==True:
                    good.append(os.path.basename(item))
                    Gibbs_free_energies.append(Gibbs_free_energy)
                    coords.append(coord)
                    coord2RMSD_Hs.append(coord2RMSD_H)
                else:
                    wrong_freq.append(item)
            else:
                wrong_end.append(item)
        if good:
            paired = list(zip(good, Gibbs_free_energies, coords, coord2RMSD_Hs)) 
            ORDERED = sorted(paired, key=lambda x: x[1])
            coord_lower_energy[ORDERED[0][0].split('.')[0]] = ORDERED[0][2]
            to_print = []
            for i, _ in enumerate(ORDERED):
                to_print.append([ORDERED[i][0].split('_Cell_')[-1].split('.')[0], ORDERED[i][1]])
            to_print = pd.DataFrame(to_print, columns = ['Cell', 'Gibbs (hartree/partícula)'])    
            print(first_name)
            print(to_print)
            print('\n')
            with open(first_name+'.orden', 'wt') as final:
                to_print.to_string(final)

        
            rmsd_matrix = np.zeros((len(ORDERED),len(ORDERED)))
            for i, _ in enumerate(rmsd_matrix):
                for j, _ in enumerate(rmsd_matrix):
                    if j < i:
                        rmsd_matrix[i][j] = rmsd_matrix[j][i]
                    elif j == i:
                        rmsd_matrix[i][j] == 0.0
                    else:
                        # =============================================================
                        #     Calculando RMSD
                        # =============================================================
                        P = ORDERED[i][3]
                        Q = ORDERED[j][3]
                        rmsd_matrix[i][j] = kabsch_rmsd(P, Q, translate = True)
            index = [g.split('_Cell_')[-1].split('.')[0] for g in good]
            to_print_rmsd_matrix = pd.DataFrame(rmsd_matrix, index = index, columns = index)
            with open(first_name+'.rmsd', 'wt') as final:
                to_print_rmsd_matrix.to_string(final)
        #========================Se exporta el .xyz==================
            if xyz_out:
                with open(ORDERED[0][0].split('.')[0]+'.xyz', 'wt') as final:
                    final.write(str(len(ORDERED[0][2]))+'\n\n')
                    ORDERED[0][2].to_string(final, header=False, index=False)

        
    #======================================================================
    if wrong_freq:
        print(f'Las cálculos: {wrong_freq} presentaron frecuencias negativas o imaginarias.')
    if wrong_end:
        print(f'Las cálculos: {wrong_end} presentaron problemas para un correcta finalizacion. Check the psi4 output!')

    return coord_lower_energy



if __name__ == '__main__':...