Source code for boutpy.boututils.compare_inp

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Parser config files

* parser configure files
* convert a dict to a **level1** dict
* compare BOUT.inps
* dataframe with 'sec-opt' multi_index for config file

"""

from __future__ import (absolute_import, division,
                        print_function, unicode_literals)

__all__ = ['str_to_num', 'parser_config', 'dict_to_level1',
           'multi_index', 'compare_inp']

__date__ = '01/04/2018'
__version__ = '0.5.1.1'
__author__ = 'J.G. Chen'
__email__ = 'cjgls@pku.edu.cn'

from StringIO import StringIO
import pandas as pd

import numpy as np
from configobj import ConfigObj
from boutpy.boututils.functions import sort_nicely


[docs]def str_to_num(x): """Convert str 'x' to int or float if possible, otherwise return itself. """ if isinstance(x, str): try: return int(x) except ValueError: try: return float(x) except ValueError: return x else: return x
def dict_colored(Dict, format_code=[98, 91, 94, 92, 95, 93, 96]): """ add color str to 2-levels dict according to the count of values.""" format_base = '\033[{}m{}\033[0m' fc = format_code[:] nsize = len(Dict) ncode = len(fc) if ncode < nsize: fc.extend(np.ones(nsize-ncode)) count = zip(*np.unique(Dict.values(), return_counts=True)) if len(count[0]) < 2: return Dict else: # at least 2 different value count = sorted(count, key=lambda x: x[1], reverse=True) keys = zip(*count)[0] keys_c = dict(zip(keys, fc)) return {k: format_base.format(keys_c[Dict[k]], Dict[k]) \ for k in Dict.keys()} def df_colored(dataframe, format_code=[98, 91, 94, 92, 95, 93, 96], columns=[('Sec', 'Opt')], title_color=47): """ add color str to dataframe elements to highlight difference. Parameters ---------- dataframe : DataFrame format_code : list of int, optional, default: [k, r, b, g, c, y, m] color code in '\033[{}m{}\033[0m'.format(format_code, cell) for different color. """ if not isinstance(dataframe, pd.core.frame.DataFrame): raise TypeError("DataFrame type required!!") dict_df = dataframe.fillna('NaN').to_dict('index') result = {k: dict_colored(dict_df[k], format_code=format_code) \ for k in dict_df.keys()} result = pd.DataFrame.from_dict(result, orient='index') # due to the color code add extra string, # need to add the columns' name to result pd_case = sort_nicely(result.columns) df_title = pd.DataFrame(['\033[{}m{}\033[0m'.format(title_color, i) \ for i in pd_case], columns=columns, index=pd_case).T result = df_title.append(result) result = result.reindex(pd.MultiIndex.from_tuples(result.index)) # keep original columns' order result = result.reindex_axis(dataframe.columns, axis=1) return result
[docs]def dict_to_level1(Dict, sep='.'): """Convert a dict to a one level dict. Parameters ---------- Dict : dict its values may be in *dict* type sep : char a seperator to join keys in the multi-level dict Returns ------- result : dict its values are not in *dict* type Examples -------- .. code-block:: python >>> test1 = {'A': {'a': 1,'b': 2}, 'B': 3} >>> dict_to_level1(test1) # {'A.a': 1, 'A.b': 2, 'B': 3} >>> test2 = {'A':{'a': 1, 'b': 2}, 'a': 3} >>> dict_to_level1(test2) # {'A.a': 1, 'A.b': 2, 'a': 3} """ result = {} if isinstance(Dict, dict): for key in Dict.keys(): if not isinstance(Dict[key], dict): # add key-value pairs whose value is not a dict result[key] = Dict[key] else: # key-value pairs whose value is a dict subresult = {} for ikey in Dict[key].keys(): subresult[sep.join([key, ikey])] = Dict[key][ikey] subresult = dict_to_level1(subresult, sep=sep) result.update(subresult) return result
[docs]def parser_config(configfile, level1=False, header=None, dataframe=False): r"""Parser config files. Return a dict contains all options information in the config files. Parameters ---------- configfile : string path to file or config string level1 : bool, optional, default: False If ``level1=False`` [default], then using 'sec.opt' as keys. Otherwise, Using all options as keys. header : string, optional section header for the beginning section if it doesn't have one. Default is ``None`` which means it does not try to add header for the beginning section. dataframe : bool, optional, default: False return the result in pd.DataFrame format if ``dataframe=True``. Returns ------- result : dict | pd.DataFrame a dict with 'sec.subsec[.subsubsec...].opt' as key if ``level1 = True``. Otherwise it is a multi-levels dict by default. return the result in pd.DataFrame format if ``dataframe=True``. Examples -------- .. code-block:: python >>> result = parser_config('/path/to/BOUT.inp') # ConfigObj({'NOUT': '250', 'TIMESTEP': '1.e0', # ... # 'kappa': {'bndry_core': 'neumann', 'bndry_sol': 'neumann'}}) >>> result = parser_config('[header]\noption1=1', header='main') # ConfigObj({'header': {'option1': '1'}}) >>> result = parser_config('#comments\noptions=1') # ConfigObj({'options': '1'}) >>> result = parser_config('#comments\noptions=1', header='main') # ConfigObj({'main': {'options': '1'}}) """ # for Python>3.2 # import configparser # configparser.read_string('[main]\noption=1') try: with open(configfile) as lines: lines = lines.read() except IOError: # not a file, try to parse it directly lines = configfile if header is not None: lines = '[{}]\n{}'.format(header, lines) result = ConfigObj(StringIO(lines)) # delete the `header` section if there are no options. if header in result and len(result[header]) == 0: result.pop(header) result = dict(result) if level1: # WARNING: deprecated result = dict_to_level1(result) if dataframe: # transpose, otherwise the DataFrame try to # keep same dtype in one column. # e.g. # df = pd.DataFrame([[1, 2], [5.3, 6]]) # df: # 1.0 2 # 5.3 6 result = pd.DataFrame.from_dict(result).T result = result.applymap(lambda x: str_to_num(x)) return result
[docs]def multi_index(boutinp, header='global', column='Value'): """Return a panda.DataFrame with ['Sec', 'Opt'] as multi-index. Parameters ---------- boutinp : str path to BOUT.inp file. header : str, optional, default: 'global' the header name for the first section column : str, optional, default: 'Value' column's name Returns ------- df : pandas.DataFrame pandas.DataFrame with multi-index """ boutinp_dict = parser_config(boutinp, header=header) # stack --> obtain multi-index pd.Series df = pd.DataFrame.from_dict(boutinp_dict).stack() # index: ['Sec', 'Opt'] df = df.swaplevel() # group by section name df.sort_index(inplace=True) df = pd.DataFrame(df) # reindex as the order of section in the BOUT.inp df = df.reindex_axis(boutinp_dict.keys(), axis=0, level=0) df.index.set_names(['Sec', 'Opt'], inplace=True) df.rename(columns={0: column}, inplace=True) df = df.applymap(lambda x: str_to_num(x)) return df
[docs]def compare_inp(*configfiles, **kwargs): """Comparison of configfiles, return & print details of differences. Parameters ---------- configfiles : list|tuple of stings path of BOUT.inps grid : bool, optional, default: False output grid names if they are different. quiet : bool, optional, default: False print comparison result to screen. case : bool, optional, default: True case sensitive if `case` is True short : bool, optional, default: True Using short name for cases render : bool, optional, default: True highlight different values in terminal output. Returns ------- tuple(cases, table) cases, table : pandas.DataFrame cases : name of each case table : differences of each case Examples -------- .. code-block:: python >>> import glob >>> cases, table = compare_inp(glob.glob("*/BOUT.inp"), grid=False) """ result_dict = {} cases = {} count = 0 # header for the beginning section header = 'global' showgrid = kwargs.get('grid', False) # case sensitive or not casesensitive = kwargs.get('case', True) render = kwargs.get('render', True) # using short name or not shortname = kwargs.get('short', True) quiet = kwargs.get('quiet', False) pd.set_option('display.max_colwidth', -1) pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) for i in configfiles: if isinstance(i, (list, tuple)): for ii in sort_nicely(i): case = 'case{}'.format(count) if ii.endswith('/BOUT.inp'): cases[case] = ii.replace('/BOUT.inp', '') else: cases[case] = ii result_dict[case] = multi_index(ii, header=header, column=case) count += 1 else: case = 'case{}'.format(count) result_dict[case] = multi_index(i, header=header, column=case) cases[case] = i count += 1 # secs' and opts' name in boutinp boutinp_secs = result_dict['case0'].index.levels[0] boutinp_opts = result_dict['case0'].index.levels[1] table = pd.concat(result_dict.values(), axis=1) table = table.reindex_axis(boutinp_secs, axis=0, level=0) table.sort_index(axis=1, inplace=True) if casesensitive: drop_keys = [i for i in table.index if (len(table.loc[i].unique()) == 1)] else: drop_keys = [i for i in table.index if (len( table.loc[i].astype(str).str.lower().unique()) == 1)] df_cases = pd.DataFrame( pd.Series.from_array(cases.values(), index=cases.keys()), columns=['Name']) # df_cases = df_cases.sort_index(axis=0) df_cases = df_cases.loc[sort_nicely(df_cases.index)] if shortname and (not quiet): print("-" * 50) print("cases:\n", df_cases.to_string(justify='left')) # extract grid info due to its value too long grid_ind = (header, 'grid') if grid_ind not in drop_keys: drop_keys.append(grid_ind) if showgrid and (not quiet): grid = pd.DataFrame(table.loc[grid_ind])[header] print("-" * 50) if shortname: print("grids:\n", grid.to_string(justify='left')) else: print("grids:\n", grid.rename(index=cases).to_string(justify='left')) if len(cases) > 1: table.drop(drop_keys, inplace=True) else: table.drop(grid_ind, inplace=True) # table = table.reindex_axis(table_ind, axis=0, level=0) # nicely sort columes table = table.reindex_axis(sort_nicely(table.columns), axis=1) if not table.empty: if not shortname: table.rename(columns=cases, inplace=True) if not quiet: print("-" * 50) print("differences:") if render: print(df_colored(table).to_string( justify='left', header=False)) else: print(table.to_string(justify='left')) elif not quiet: print(">>>>>> **ALL** options are **SAME** <<<<<<") return cases, table
if __name__ == '__main__': # execute only if run as a script import argparse parser = argparse.ArgumentParser( description='get differences of configure files', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("configfiles", nargs='*', help="name of configure files") parser.add_argument("-g", "--grid", action='store_false', help="output difference of grid names") parser.add_argument("-c", "--case", action='store_false', help="case sensitive about the option value") parser.add_argument("-r", "--render", action='store_false', help="render the output in terminal") parser.add_argument("-s", "--short", action='store_false', help="use short name for cases name") args = parser.parse_args() tmp = compare_inp(args.configfiles, grid=args.grid, case=args.case, short=args.short, render=args.render)