Add R-Factor support.

The Curve comparison is possible through R-Factor analysis. Further comparison like shape analysis, similarity index... will be included later. Data export is now possible through the gui menu or as a method of the Data object.
2020-07-22 18:32:15 +02:00 · 2020-07-22 18:32:15 +02:00 · cd3fb05932
parent 3a20719d19
commit cd3fb05932
7 changed files with 16126 additions and 40 deletions
--- a/src/msspec/calcio.py
+++ b/src/msspec/calcio.py
@ -13,6 +13,7 @@ import numpy as np
 import ase
 from ase import Atom, Atoms
 from ase.data import chemical_symbols
 import os
 from msspec.misc import UREG, LOGGER
 from msspec.utils import get_atom_index
@ -1015,3 +1016,248 @@ class SpecIO(object):
        with open(filename, 'r') as fd:
            content = fd.read()
        return pat.findall(content)
 class CompCurveIO(object):
    def __init__(self, parameters):
        self.parameters = parameters
    def write_input_file(self, filename='comp_curve.dat'):
        def title(t, shift=4, width=78, center=True):
            if center:
                s = ('{}*{:^%ds}*\n' % (width - shift - 2)
                     ).format(' ' * shift, t)
            else:
                s = ('{}*{:%ds}*\n' % (width - shift - 2)
                     ).format(' ' * shift, t)
            return s
        def rule(tabs=(5, 10, 10, 10, 10), symbol='=', shift=4, width=78):
            s = ' ' * shift + '*' + symbol * (width - shift - 2) + '*\n'
            t = np.cumsum(tabs) + shift
            sep = list(s)
            for i in t:
                sep[i] = '+'
            return ''.join(sep)
        def fillstr(a, b, index, justify='left'):
            alist = list(a)
            if justify == 'left':
                offset = -len(b) + 1
            elif justify == 'center':
                offset = (-len(b) + 1) / 2
            elif justify == 'decimal':
                try:
                    offset = -(b.index('.') - 1)
                except ValueError:
                    offset = 0
            else:
                offset = 0
            for i, _ in enumerate(b):
                alist[int(index + offset + i)] = _
            return ''.join(alist)
        def create_line(legend='', index=49, dots=False):
            s = ' ' * 78 + '\n'
            if dots:
                s = fillstr(s, "..", 6, justify='right')
            s = fillstr(s, "*", 4)
            s = fillstr(s, "*", 77)
            s = fillstr(s, legend, index, justify='right')
            return s
        p = self.parameters
        content = rule(tabs=(), symbol='*')
        content += title('R-FACTOR, SIMILARITY INDEX, DISTANCE, GOODNESS OF FIT')
        content += title('KERNEL DISTANCE AND SHAPE ANALYSIS')
        content += rule(tabs=(), symbol='*')
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : GENERAL', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("N_PAR,NORM,I_SCALE,I_NORM")
        line = fillstr(line, str(p.get_parameter('general_npar')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('general_norm')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('general_iscale')), 29, 'left')
        line = fillstr(line, str(p.get_parameter('general_inorm')), 39, 'left')
        content += line
        line = create_line("I_SYM,SYM,I_POSI")
        line = fillstr(line, str(p.get_parameter('general_isym')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('general_sym')), 19, 'decimal')
        line = fillstr(line, str(p.get_parameter('general_iposi')), 29, 'left')
        content += line
        line = create_line("I_DIST,I_CUR,I_SA,I_PRINT")
        line = fillstr(line, str(p.get_parameter('general_idist')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('general_icur')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('general_isa')), 29, 'left')
        line = fillstr(line, str(p.get_parameter('general_iprint')), 39, 'left')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : WEIGHTS', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("I_WEIGHT,ALPHA,BETA,SIGMA")
        line = fillstr(line, str(p.get_parameter('weights_iweight')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('weights_alpha')), 19, 'decimal')
        line = fillstr(line, str(p.get_parameter('weights_beta')), 29, 'decimal')
        line = fillstr(line, str(p.get_parameter('weights_sigma')), 39, 'decimal')
        content += line
        line = create_line("I_SHIFT,MAXW")
        line = fillstr(line, str(p.get_parameter('weights_ishift')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('weights_maxw')), 19, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : R-FACTORS', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("V_I")
        line = fillstr(line, str(p.get_parameter('rfc_vi')), 9, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : SIMILARITY INDICES', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("ALHPAS,BETAS,N_BINS")
        line = fillstr(line, str(p.get_parameter('sim_alphas')), 9, 'decimal')
        line = fillstr(line, str(p.get_parameter('sim_betas')), 19, 'decimal')
        line = fillstr(line, str(p.get_parameter('sim_nbins')), 29, 'left')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : DISTANCES', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("ALHPAD,I_BETA,L,SIGMAD")
        line = fillstr(line, str(p.get_parameter('dist_alphad')), 9, 'decimal')
        line = fillstr(line, str(p.get_parameter('dist_ibeta')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('dist_l')), 29, 'left')
        line = fillstr(line, str(p.get_parameter('dist_sigmad')), 39, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : GOODNESS OF FIT', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("N_BING,ALPHAG")
        line = fillstr(line, str(p.get_parameter('gof_nbing')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('gof_alphag')), 19, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : KERNEL DISTANCES', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("ALPHAK,L,SIGMAK")
        line = fillstr(line, str(p.get_parameter('kdist_alphak')), 9, 'decimal')
        line = fillstr(line, str(p.get_parameter('kdist_l')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('kdist_sigmak')), 29, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : MOMENTS', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("N_GRID,N_MOM,BASIS")
        line = fillstr(line, str(p.get_parameter('mom_ngrid')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('mom_nmom')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('mom_basis')), 29, 'left')
        content += line
        line = create_line("I_ALG,MU,NU")
        line = fillstr(line, str(p.get_parameter('mom_ialg')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('mom_mu')), 19, 'decimal')
        line = fillstr(line, str(p.get_parameter('mom_nu')), 29, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : CHORDS', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("I_CHORD,METHOD,VALUE,N_BINC")
        line = fillstr(line, str(p.get_parameter('chords_ichord')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('chords_method')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('chords_value')), 29, 'left')
        line = fillstr(line, str(p.get_parameter('chords_nbinc')), 39, 'left')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : CHAIN CODES', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("N_CONNECT,SCALEC")
        line = fillstr(line, str(p.get_parameter('codes_nconnect')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('codes_scalec')), 19, 'decimal')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('CALCULATION PARAMETERS : CONTOUR', index=29)
        content += rule(tabs=(5,10,10,10,10))
        line = create_line("NBIN,N_LEN,SH_AN,I_FOU")
        line = fillstr(line, str(p.get_parameter('cont_nbin')), 9, 'left')
        line = fillstr(line, str(p.get_parameter('cont_nlen')), 19, 'left')
        line = fillstr(line, str(p.get_parameter('cont_shan')), 29, 'left')
        line = fillstr(line, str(p.get_parameter('cont_ifou')), 39, 'left')
        content += line
        line = create_line("I_NORM")
        line = fillstr(line, str(p.get_parameter('cont_ifou')), 9, 'left')
        content += line
        content += rule(tabs=(5,10,10,10,10))
        content += create_line('EXPERIMENTAL INPUT FILE :', index=29)
        content += rule(tabs=(), symbol='-')
        line = create_line('')
        line = fillstr(line, 'NAME', 14, 'right')
        line = fillstr(line, 'TYPE', 58, 'right')
        content += line
        content += rule(tabs=(5,40))
        line = create_line('EXPERIMENT', index=49)
        line = fillstr(line, str(p.get_parameter('exp_filename')), 9, 'right')
        content += line
        content += rule(tabs=(5,40))
        content += create_line('CALCULATED INPUT FILE :', index=29)
        content += rule(tabs=(), symbol='-')
        line = create_line('')
        line = fillstr(line, 'NAME', 14, 'right')
        line = fillstr(line, 'PARAMETER 1', 49, 'right')
        line = fillstr(line, 'PARAMETER 2', 63, 'right')
        content += line
        content += rule(tabs=(5,40,7,7,7))
        calc_fnames = p.get_parameter('calc_filename').value
        for fname in calc_fnames:
            line = create_line('')
            line = fillstr(line, fname, 9, 'right')
            line = fillstr(line, str(p.get_parameter('calc_param1')), 56, 'decimal')
            line = fillstr(line, str(p.get_parameter('calc_param2')), 70, 'decimal')
            content += line
        content += rule(tabs=(5,40,7,7,7))
        content += create_line('END OF DATA FILE', index=31)
        content += rule(tabs=())
        content += rule(tabs=(),symbol='*')
        # Write the content to filename
        try:
            with open(filename, 'r') as fd:
                old_content = fd.read()
        except IOError:
            old_content = ''
        modified = False
        if content != old_content:
            with open(filename, 'w') as fd:
                fd.write(content)
            modified = True
        return modified
    def load_results(self, index=0, prefix='rfc/experiment_rf'):
        data = []
        for i in range(1, 13):
            #data.append(np.loadtxt(prefix + f'{i:02d}' + '.txt')[-1])
            results = np.loadtxt(prefix + f'{i:02d}' + '.txt')
            results = results.reshape((-1, 2))
            data.append(results[index,1])
        suffix = 'ren'
        exp = {'int': None, 'ren': None, 'chi': None, 'cdf': None}
        exp_ren = np.loadtxt(os.path.join('exp', 'div',
                                          f'experiment_{suffix}.txt'))
        calc_ren = np.loadtxt(os.path.join('calc', 'div',
                                           f'calculation{index:d}_{suffix}.txt'))
        return data, exp_ren, calc_ren
--- a/src/msspec/calculator.py
+++ b/src/msspec/calculator.py
@ -42,6 +42,7 @@ For more information on MsSpec, follow this
 """
 import inspect
 import logging
 import os
 import re
 import shutil
@ -63,6 +64,7 @@ from ase.calculators.calculator import Calculator
 from terminaltables.ascii_table import AsciiTable
 from msspec import iodata
 from msspec.calcio import CompCurveIO
 from msspec.calcio import PhagenIO
 from msspec.calcio import SpecIO
 from msspec.data import electron_be
@ -74,6 +76,8 @@ from msspec.misc import set_log_output
 from msspec.misc import UREG
 from msspec.misc import XRaySource
 from msspec.parameters import CalculationParameters
 from msspec.parameters import CompCurveParameters
 from msspec.parameters import CompCurveGeneralParameters
 from msspec.parameters import DetectorParameters
 from msspec.parameters import EIGParameters
 from msspec.parameters import GlobalParameters
@ -91,6 +95,7 @@ from msspec.spec.fortran import _eig_mi
 from msspec.spec.fortran import _eig_pw
 from msspec.spec.fortran import _phd_mi_noso_nosp_nosym
 from msspec.spec.fortran import _phd_se_noso_nosp_nosym
 from msspec.spec.fortran import _comp_curves
 from msspec.utils import get_atom_index
@ -1003,6 +1008,228 @@ def MSSPEC(spectroscopy='PED', **kwargs):
    return cls(**kwargs)
 class RFACTOR(object):
    def __init__(self, folder='./rfc'):
        self.iodata = iodata.Data('R-Factor analysis')
        self.folder = folder
        self._params = CompCurveParameters()
        self.general_parameters = CompCurveGeneralParameters(
            self._params)
        self.io = CompCurveIO(self._params)
        # prepare the working area
        if not os.path.exists(self.folder):
            os.makedirs(self.folder)
            os.makedirs(os.path.join(self.folder, 'rfc'))
            os.makedirs(os.path.join(self.folder, 'exp/div'))
            os.makedirs(os.path.join(self.folder, 'calc/div'))
            os.makedirs(os.path.join(self.folder, 'plot'))
        # store the r-factor analysis results
        # self.variables = {'variable0_name': [value0, value1, ...],
        #                   'variable1_name': [value0, value1, ...],
        #                   ...}
        self.variables = {}
        # self.rfc = [[rf0_0, rf0_1, ... , rf0_11],    <-- run 0
        #             [rf1_0, rf1_1, ... , rf1_11],    <-- run 1
        #             ............................,
        #             [rfn_0, rfn_1, ... , rfn_11]]    <-- run n
        self.rfc = []
        # The x and y array to compare to
        self.xref = self.yref = [0,]
        # The index of the best value
        self.index = 0
        # The best values as a dictionary
        self.best_values = {}
        # The number of calculation files in the stack. This counter is
        # inremented each time calculation is run
        self.stack_count = 0
        # initialize all parameters with defaults values
        self.logger = logging.getLogger("RFACTOR")
        self.logger.info("Set default values =========================================")
        for p in (list(self.general_parameters)):
            p.set(p.default)
        self.logger.info("End of default values ======================================")
        #exit()
    def set_references(self, X, Y):
        self.xref = X
        self.yref = Y
    def run(self, *args, data=None, **kwargs):
        # Get the data object if provided
        #data = kwargs.pop('data', None)
        if data:
            assert isinstance(data, iodata.Data), ("Unsupported type for data"
                                                   "keyword.")
            self.iodata = data
        # Move to the working folder
        cwd = os.getcwd()
        os.chdir(self.folder)
        # Write the reference data
        np.savetxt('exp/experiment.txt', np.transpose([self.xref, self.yref]))
        # Write all the input calculation files
        # Number of input files
        noif = int(len(args)/2)
        for i in range(noif):
            X, Y = args[2*i], args[2*i+1]
            fname = os.path.join('calc',
                                 f'calculation{self.stack_count:d}.txt')
            # And save to the working space
            np.savetxt(fname, np.transpose([X, Y]))
            self.stack_count += 1
        # Update the list of input calculation files
        self._params.calc_filename = []
        for i in range(self.stack_count):
            fname = os.path.join('calc', f'calculation{i:d}.txt')
            self._params.calc_filename.append(fname)
        # Write the input file
        self.io.write_input_file('comp_curves.dat')
        # And finally run
        _comp_curves.run()
        #######################################################################
        # Load the results
        #######################################################################
        self.rfc = []
        for i in range(self.stack_count):
            # Read results files and get the R-Factors, exp data and calc
            # data for file index 'i'
            rfc, exp_data, calc_data = self.io.load_results(index=i)
            # Update the list of R-Factors results
            self.rfc.append(rfc)
        # Update the variables values
        for i in range(noif):
            for k,v in kwargs.items():
                try:
                    vi = v[i]
                except (IndexError, TypeError) as err:
                    vi = v
                try:
                    self.variables[k].append(vi)
                except KeyError as err:
                    self.variables[k] = [vi,]
        #######################################################################
        # Analysis
        #######################################################################
        rfc = np.array(self.rfc)
        # Get the index of the minimum for each R-Factor (each column)
        all_min = np.argmin(rfc, axis=0)
        # Iterate over each run and get the number of R-Factors that are min
        # for this set
        all_counts = np.zeros(self.stack_count, dtype=int)
        for i in range(self.stack_count):
            all_counts[i] = len(np.where(all_min==i)[0])
        # The best set of variables (ie the run index) is the one with the
        # highest number of counts:
        self.index = np.argmax(all_counts)
        # Update the "best values" dict
        self.best_values = {k:self.variables[k][self.index] for k in
                            self.variables.keys()}
        # with np.printoptions(precision=6, linewidth=300, threshold=200):
            # print('rfc: ')
            # print(rfc)
            # print('all_min: ', all_min)
            # print('all_counts: ', all_counts)
            # print('variables: ', self.variables)
        #######################################################################
        # Store values
        #######################################################################
        # Three datasets will be created or existing ones will be reused if
        # any.
        dset_values_title = "CurveComparison Values"
        dset_results_title = "CurveComparison Results"
        dset_rfc_title = "CurveComparison R-Factors"
        # Create (or re-create) the datasets
        dset_values = self.iodata.add_dset(dset_values_title, overwrite=True)
        dset_values.add_columns(x=[], yref=[])
        view_values = dset_values.add_view("Comparison", xlabel="X", ylabel="Y",
                                           autoscale=True, overwrite=True)
        dset_results = self.iodata.add_dset(dset_results_title, overwrite=True)
        dset_results.add_columns(variable_set=list(range(self.stack_count)),
                                 counts=all_counts.copy())
        dset_results.add_columns(**self.variables)
        view_results = dset_results.add_view("R-Factor analysis",
                                             xlabel="variable set number",
                                             ylabel="counts",
                                             title=("Number of R-Factors "
                                                    "minima for each set of "
                                                    "variables"))
        dset_rfc = self.iodata.add_dset(dset_rfc_title, overwrite=True)
        dset_rfc.add_columns(rfactor_number=list(range(12)))
        view_rfc = dset_rfc.add_view("R-Factor results", xlabel="R-Factor #",
                                     ylabel="R-Factor value",
                                     title="", autoscale=True, marker="s")
        for i in range(self.stack_count):
            rfc, exp_data, calc_data = self.io.load_results(index=i)
            # Store the experimental data
            dset_values.x, dset_values.yref = exp_data.T
            # Append the calculated values
            ycalc = calc_data[:,1]
            dset_values.add_columns(**{f"calc{i:d}": ycalc})
            dset_rfc.add_columns(**{f'variable_set{i:d}': rfc})
        # Plot the curves
        view_values.select('x', 'yref', legend='Reference values')
        title = ''
        for k,v in self.best_values.items():
            title += f'{k}={v} '
        view_values.select('x', f"calc{self.index:d}",
                           legend="Best calculated values")
        view_values.set_plot_options(title=title)
        view_results.select('counts')
        for i in range(self.stack_count):
            view_rfc.select('rfactor_number', f'variable_set{i:d}',
                            legend=f"variables set #{i:d}")
        # Save the parameters
        for p in self.get_parameters():
            bundle = {'group': str(p.group),
                      'name': str(p.name),
                      'value': str(p.value),
                      'unit': '' if p.unit is None else str(p.unit)}
            dset_results.add_parameter(**bundle)
        # Move back to the initial folder
        os.chdir(cwd)
        # And return the Data object
        return self.iodata
    def get_parameters(self):
        """Get all the defined parameters in the calculator.
        :return: A list of all parameters objects.
        :rtype: List of :py:class:`parameters.Parameter`
        """
        _ = []
        for section in ('general', ):
            parameters = getattr(self, section + '_parameters')
            for p in parameters:
                _.append(p)
        return _
 if __name__ == "__main__":
    pass
--- a/src/msspec/iodata.py
+++ b/src/msspec/iodata.py
@ -72,6 +72,7 @@ import sys
 from distutils.version import LooseVersion
 from distutils.version import StrictVersion
 from io import StringIO
 from datetime import datetime
 import ase.io
 import h5py
@ -79,6 +80,7 @@ import numpy as np
 import wx.grid
 from lxml import etree
 from matplotlib.backends.backend_wxagg import FigureCanvasWxAgg as FigureCanvas
 from matplotlib.backends.backend_agg import FigureCanvasAgg
 from matplotlib.backends.backend_wxagg import NavigationToolbar2WxAgg
 from matplotlib.figure import Figure
 from terminaltables import AsciiTable
@ -124,6 +126,18 @@ def cols2matrix(x, y, z, nx=88*1+1, ny=360*1+1):
    return ux, uy, zz
 def is_title_valid(title):
    """ Ensure the string does not contain special characters:
        /\:*?"<>|
    """
    special_chars = ('/', '\\', ':', '*', '?', '\"', '<', '>', '|')
    for char in special_chars:
        if title.find(char) > -1:
            return False
    return True
 class _DataPoint(dict):
    def __init__(self, *args, **kwargs):
        dict.__init__(self, *args, **kwargs)
@ -146,6 +160,12 @@ class DataSet(object):
    """
    def __init__(self, title, notes=""):
        assert is_title_valid(title), '\/:*?"<>| are not allowed in the string'
        #self._col_names = []
        #self._col_arrays = []
        self.__dict__['_col_names'] = []
        self.__dict__['_col_arrays'] = []
        self.title = title
        self.notes = notes
        self._views = []
@ -153,12 +173,16 @@ class DataSet(object):
        self.attributes = {}
        self._col_names = []
        self._col_arrays = []
        self._defaults = {'bool': False, 'str': '', 'int': 0, 'float': 0.,
                          'complex': complex(0)}
        self._formats = {bool: '{:s}', str: '{:s}', int: '{:<20d}',
                         float: '{:<20.10e}', complex: 's'}
        self._formats = ((np.integer, '{:<20d}'),
                         (np.floating, '{:<20.10e}'),
                         (np.complex, '({0.real:<.10e} {0.imag:<.10e}j)'),
                         (np.bool, '{:s}'),
                         (str, '{:s}'))
    def _empty_array(self, val):
        if isinstance(val, str):
@ -273,7 +297,7 @@ class DataSet(object):
        """
        return self._col_names
-    def add_view(self, name, **plotopts):
+    def add_view(self, name, overwrite=False, **plotopts):
        """
        Creates a new view named *name* with specied plot options.
@ -283,6 +307,9 @@ class DataSet(object):
        :return: a view.
        :rtype: :py:class:`iodata._DataSetView`
        """
        if overwrite:
            self.delete_view(name)
        if isinstance(name, str):
            v = _DataSetView(self, name, **plotopts)
        else:
@ -291,6 +318,14 @@ class DataSet(object):
        self._views.append(v)
        return v
    def delete_view(self, name):
        view_titles = [_.title for _ in self._views]
        try:
            i = view_titles.index(name)
            self._views.pop(i)
        except:
            pass
    def views(self):
        """Returns all the defined views in the dataset.
@ -368,6 +403,12 @@ class DataSet(object):
        condition = kwargs.get('where', 'True')
        indices = []
    def export_views(self, folder):
        for view in self.views():
            f = view.get_figure()
            fname = os.path.join(folder, view.title) + '.png'
            f.savefig(fname)
    def export(self, filename="", mode="w"):
        """Export the DataSet to the given *filename*.
@ -379,8 +420,48 @@ class DataSet(object):
            Not yet implemented
        """
        rule = '#' * 80 + '\n'
        def header():
            s = '# PARAMETERS:\n'
            groups = []
            for p in self.parameters():
                g = p['group']
                if g not in groups:
                    groups.append(g)
            parameters = {}
            for group in groups:
                parameters[group] = self.get_parameter(group=group)
            for k, v in parameters.items():
                if k == 'Cluster':
                    continue
                s += f"# {k}:\n"
                if not(isinstance(v, list)):
                    v = [v,]
                for p in v:
                    s += f"#       {p['name']} = {p['value']} {p['unit']}\n"
            return s
        colnames = self.columns()
        with open(filename, mode) as fd:
            # write the date and time of export
            now = datetime.now()
            fd.write(f"# Data exported on {now}\n")
            fd.write(rule)
            # Append notes
            fd.write("# NOTES:\n")
            for line in self.notes.split('\n'):
                fd.write(f"#    {line}\n")
            fd.write(rule)
            # Append parameters
            fd.write(header())
            fd.write(rule)
            # Append the data
            fd.write("# DATA:\n")
            fd.write("# " + ("{:<20s}" * len(colnames)).format(*colnames
                                                               ) + "\n")
            for i in range(len(self)):
@ -389,7 +470,7 @@ class DataSet(object):
                    value = row[key][0]
                    fmt = '{:s}'
                    #print value
-                    for t, f in list(self._formats.items()):
+                    for t, f in self._formats:
                        if isinstance(value, t):
                            fmt = f
                            break
@ -406,7 +487,7 @@ class DataSet(object):
        new._col_names = self.columns()
        for arr in self._col_arrays:
-            new._col_arrays.append(np.array(arr[itemspec]).flatten())
+            new._col_arrays.append(np.asarray(arr)[itemspec].flatten())
        return new
@ -424,6 +505,13 @@ class DataSet(object):
            raise AttributeError("'{}' object has no attribute '{}'".format(
                self.__class__.__name__, name))
    def __setattr__(self, name, value):
        if name in self._col_names:
            i = self._col_names.index(name)
            self._col_arrays[i] = value
        else:
            self.__dict__[name] = value
    def __iter__(self):
        for i in range(len(self)):
            _ = {k: arr[i] for k, arr in zip(self._col_names,
@ -433,7 +521,10 @@ class DataSet(object):
    def __len__(self):
        try:
-            length = len(self._col_arrays[0])
+            #length = len(self._col_arrays[0])
            length = 0
            for array in self._col_arrays:
                length = max(length, len(array))
        except IndexError:
            length = 0
        return length
@ -482,18 +573,27 @@ class Data(object):
    """
    def __init__(self, title=''):
        assert is_title_valid(title), '\/:*?"<>| are not allowed in the string'
        self.title = title
        self._datasets = []
        self._dirty = False
-    def add_dset(self, title):
+    def add_dset(self, title, overwrite=False):
        """Adds a new DataSet in the Data object.
        :param title: The name of the DataSet.
        :type title: str
        :param overwrite: Tells whether to re-create the dataset if it exists.
        :type overwrite: bool
        :return: The newly created DataSet.
        :rtype: :py:class:`iodata.DataSet`
        """
        if overwrite:
            try:
                self.delete_dset(title)
            except Exception as err:
                pass
        titles = [d.title for d in self._datasets]
        if not title in titles:
            dset = DataSet(title)
@ -598,6 +698,16 @@ class Data(object):
        self._dirty = False
        LOGGER.info('Data saved in {}'.format(os.path.abspath(filename)))
    def export(self, folder, overwrite=False):
        os.makedirs(folder, exist_ok=overwrite)
        for dset in self._datasets:
            dset_name = dset.title.replace(' ', '_')
            p = os.path.join(folder, dset_name)
            os.makedirs(p, exist_ok=overwrite)
            fname = os.path.join(p, dset_name) + '.txt'
            dset.export(fname)
            dset.export_views(p)
    @staticmethod
    def load(filename):
        """Loads an HDF5 file from the disc.
@ -688,8 +798,9 @@ class Data(object):
 class _DataSetView(object):
    def __init__(self, dset, name, **plotopts):
-        self.dataset = dset
+        assert is_title_valid(name), '\/:*?"<>| are not allowed in the string'
        self.title = name
        self.dataset = dset
        self._plotopts = dict(
            title='No title',
            xlabel='', ylabel='', grid=True, legend=[], colorbar=False,
@ -706,9 +817,15 @@ class _DataSetView(object):
    def select(self, *args, **kwargs):
        condition = kwargs.get('where', 'True')
        legend = kwargs.get('legend', '')
-        self._selection_conditions.append(condition)
+        index = kwargs.get('index', None)
-        self._selection_tags.append(args)
+        if index is None:
-        self._plotopts['legend'].append(legend)
+            self._selection_conditions.append(condition)
            self._selection_tags.append(args)
            self._plotopts['legend'].append(legend)
        else:
            self._selection_conditions[index] = condition
            self._selection_tags[index] = args
            self._plotopts['legend'][index] = legend
    def tags(self):
        return self._selection_tags
@ -733,6 +850,71 @@ class _DataSetView(object):
            data.append(values)
        return data
    def get_figure(self):
        opts = self._plotopts
        figure = Figure()
        axes = None
        proj = opts['projection']
        scale = opts['scale']
        if proj == 'rectilinear':
            axes = figure.add_subplot(111, projection='rectilinear')
        elif proj in ('polar', 'ortho', 'stereo'):
            axes = figure.add_subplot(111, projection='polar')
        for values, label in zip(self.get_data(), opts['legend']):
            # if we have only one column to plot, select a bar graph
            if np.shape(values)[0] == 1:
                xvalues = list(range(len(values[0])))
                axes.bar(xvalues, values[0], label=label,
                          picker=5)
                axes.set_xticks(xvalues)
            else:
                if proj in ('ortho', 'stereo'):
                    theta, phi, Xsec = cols2matrix(*values)
                    theta_ticks = np.arange(0, 91, 15)
                    if proj == 'ortho':
                        R = np.sin(np.radians(theta))
                        R_ticks = np.sin(np.radians(theta_ticks))
                    elif proj == 'stereo':
                        R = 2 * np.tan(np.radians(theta/2.))
                        R_ticks = 2 * np.tan(np.radians(theta_ticks/2.))
                    #R = np.tan(np.radians(theta/2.))
                    X, Y = np.meshgrid(np.radians(phi), R)
                    im = axes.pcolormesh(X, Y, Xsec)
                    axes.set_yticks(R_ticks)
                    axes.set_yticklabels(theta_ticks)
                    figure.colorbar(im)
                elif proj == 'polar':
                    values[0] = np.radians(values[0])
                    axes.plot(*values, label=label, picker=5,
                              marker=opts['marker'])
                else:
                    if scale == 'semilogx':
                        pltcmd = axes.semilogx
                    elif scale == 'semilogy':
                        pltcmd = axes.semilogy
                    elif scale == 'log':
                        pltcmd = axes.loglog
                    else:
                        pltcmd = axes.plot
                    pltcmd(*values, label=label, picker=5,
                           marker=opts['marker'])
            axes.grid(opts['grid'])
            axes.set_title(opts['title'])
            axes.set_xlabel(opts['xlabel'])
            axes.set_ylabel(opts['ylabel'])
            axes.set_xlim(*opts['xlim'])
            axes.set_ylim(*opts['ylim'])
            if label:
                axes.legend()
        axes.autoscale(enable=opts['autoscale'])
        canvas = FigureCanvasAgg(figure)
        return figure
    def serialize(self):
        data = {
            'name': self.title,
@ -930,6 +1112,7 @@ class _DataWindow(wx.Frame):
        self.Bind(wx.EVT_MENU, self.on_open, id=110)
        self.Bind(wx.EVT_MENU, self.on_save, id=120)
        self.Bind(wx.EVT_MENU, self.on_saveas, id=130)
        self.Bind(wx.EVT_MENU, self.on_export, id=140)
        self.Bind(wx.EVT_MENU, self.on_close, id=199)
@ -1004,6 +1187,28 @@ class _DataWindow(wx.Frame):
        dlg.Destroy()
        self.update_title()
    def on_export(self, event):
        overwrite = True
        dlg = wx.DirDialog(
            self, message="Export data...", defaultPath=os.getcwd(),
            style=wx.DD_DEFAULT_STYLE)
        if dlg.ShowModal() == wx.ID_OK:
            path = dlg.GetPath()
            if os.listdir(path):
                mbx = wx.MessageDialog(self,
                                       ('This folder is not empty. '
                                        'Exporting tour data here may '
                                        'overwrite its content. Do you wish '
                                        'to continue ?'),
                                       'Warning: Folder is not empty',
                                       wx.YES_NO | wx.ICON_WARNING)
                if mbx.ShowModal() == wx.ID_NO:
                    overwrite = False
                mbx.Destroy()
            self.data.export(path, overwrite)
        dlg.Destroy()
    def on_viewdata(self, event):
        dset = self.data[self._current_dset]
        frame = _GridWindow(dset, parent=self)
@ -1018,7 +1223,8 @@ class _DataWindow(wx.Frame):
        s.write(dset.get_parameter(group='Cluster', name='cluster')['value'])
        atoms = ase.io.read(s, format='xyz')
        cluster_viewer.set_atoms(atoms, rescale=True, center=True)
-        cluster_viewer.rotate_atoms(45., 45.)
+        cluster_viewer.rotate_atoms(0., 180.)
        cluster_viewer.rotate_atoms(-45., -45.)
        #cluster_viewer.show_emitter(True)
        win.Show()
@ -1054,6 +1260,36 @@ class _DataWindow(wx.Frame):
        self._current_dset = name
    def create_page(self, nb, view):
        # Get the matplotlib figure
        figure = view.get_figure()
        # Create a panel
        p = wx.Panel(nb, -1)
        # Create a matplotlib canvas for the figure
        canvas = FigureCanvas(p, -1, figure)
        sizer = wx.BoxSizer(wx.VERTICAL)
        toolbar = NavigationToolbar2WxAgg(canvas)
        toolbar.Realize()
        sizer.Add(toolbar, 0, wx.ALL|wx.EXPAND)
        toolbar.update()
        sizer.Add(canvas, 5, wx.ALL|wx.EXPAND)
        p.SetSizer(sizer)
        p.Fit()
        p.Show()
        # MPL events
        figure.canvas.mpl_connect('motion_notify_event', self.on_mpl_motion)
        figure.canvas.mpl_connect('pick_event', self.on_mpl_pick)
        nb.AddPage(p, view.title)
    def OLDcreate_page(self, nb, view):
        opts = view._plotopts
        p = wx.Panel(nb, -1)
@ -1084,36 +1320,45 @@ class _DataWindow(wx.Frame):
        for values, label in zip(view.get_data(), opts['legend']):
-            if proj in ('ortho', 'stereo'):
+            # if we have only one column to plot, select a bar graph
-                theta, phi, Xsec = cols2matrix(*values)
+            if np.shape(values)[0] == 1:
-                theta_ticks = np.arange(0, 91, 15)
+                xvalues = list(range(len(values[0])))
-                if proj == 'ortho':
+                axes.bar(xvalues, values[0], label=label,
-                    R = np.sin(np.radians(theta))
+                          picker=5)
-                    R_ticks = np.sin(np.radians(theta_ticks))
+                axes.set_xticks(xvalues)
                elif proj == 'stereo':
                    R = 2 * np.tan(np.radians(theta/2.))
                    R_ticks = 2 * np.tan(np.radians(theta_ticks/2.))
                #R = np.tan(np.radians(theta/2.))
                X, Y = np.meshgrid(np.radians(phi), R)
                im = axes.pcolormesh(X, Y, Xsec)
                axes.set_yticks(R_ticks)
                axes.set_yticklabels(theta_ticks)
                figure.colorbar(im)
            elif proj == 'polar':
                values[0] = np.radians(values[0])
                axes.plot(*values, label=label, picker=5, marker=opts['marker'])
            else:
-                if scale == 'semilogx':
+                if proj in ('ortho', 'stereo'):
-                    pltcmd = axes.semilogx
+                    theta, phi, Xsec = cols2matrix(*values)
-                elif scale == 'semilogy':
+                    theta_ticks = np.arange(0, 91, 15)
-                    pltcmd = axes.semilogy
+                    if proj == 'ortho':
-                elif scale == 'log':
+                        R = np.sin(np.radians(theta))
-                    pltcmd = axes.loglog
+                        R_ticks = np.sin(np.radians(theta_ticks))
                    elif proj == 'stereo':
                        R = 2 * np.tan(np.radians(theta/2.))
                        R_ticks = 2 * np.tan(np.radians(theta_ticks/2.))
                    #R = np.tan(np.radians(theta/2.))
                    X, Y = np.meshgrid(np.radians(phi), R)
                    im = axes.pcolormesh(X, Y, Xsec)
                    axes.set_yticks(R_ticks)
                    axes.set_yticklabels(theta_ticks)
                    figure.colorbar(im)
                elif proj == 'polar':
                    values[0] = np.radians(values[0])
                    axes.plot(*values, label=label, picker=5,
                              marker=opts['marker'])
                else:
-                    pltcmd = axes.plot
+                    if scale == 'semilogx':
-                pltcmd(*values, label=label, picker=5, marker=opts['marker'])
+                        pltcmd = axes.semilogx
                    elif scale == 'semilogy':
                        pltcmd = axes.semilogy
                    elif scale == 'log':
                        pltcmd = axes.loglog
                    else:
                        pltcmd = axes.plot
                    pltcmd(*values, label=label, picker=5,
                           marker=opts['marker'])
            axes.grid(opts['grid'])
            axes.set_title(opts['title'])
            axes.set_xlabel(opts['xlabel'])
--- a/src/msspec/parameters.py
+++ b/src/msspec/parameters.py
@ -1874,3 +1874,159 @@ class EIGParameters(BaseParameters):
    def bind_kernel_matrix_spectrum(self, p):
        value = int(p.value)
        self.spec_parameters.eigval_ispectrum_ne = value
 class CompCurveParameters(BaseParameters):
    def __init__(self):
        parameters = (
            Parameter('general_npar', types=int, default=1, limits=[1, 2],
                      fmt='d'),
            Parameter('general_norm', types=int, default=0, limits=[0, 5],
                      fmt='d'),
            Parameter('general_iscale', types=int, default=0, limits=[0, 1],
                      fmt='d'),
            Parameter('general_inorm', types=int, default=1, limits=[-2, 2],
                      fmt='d'),
            Parameter('general_isym', types=int, default=0, limits=[0, 2],
                      fmt='d'),
            Parameter('general_sym', types=(int, float), default=180.,
                      limits=[0., 360.], fmt='.2f'),
            Parameter('general_iposi', types=int, default=0, limits=[0, 1],
                      fmt='d'),
            Parameter('general_idist', types=int, default=0, limits=[0, 4],
                      fmt='d'),
            Parameter('general_icur', types=int, default=0, limits=[0, 4],
                      fmt='d'),
            Parameter('general_isa', types=int, default=0, limits=[0, 4],
                      fmt='d'),
            Parameter('general_iprint', types=int, default=1, limits=[0, 1],
                      fmt='d'),
            Parameter('weights_iweight', types=int, default=0, limits=[0, 8],
                      fmt='d'),
            Parameter('weights_alpha', types=(int, float), default=1.,
                      fmt='.2f'),
            Parameter('weights_beta', types=(int, float), default=0.33,
                      fmt='.2f'),
            Parameter('weights_sigma', types=(int, float), default=0.5,
                      fmt='.2f'),
            Parameter('weights_ishift', types=int, default=0, limits=[0, 1],
                      fmt='d'),
            Parameter('weights_maxw', types=(int, float), default=30,
                      fmt='.2f'),
            Parameter('rfc_vi', types=(int, float), default=12, fmt='.2f'),
            Parameter('sim_alphas', types=(int, float), default=1., fmt='.2f'),
            Parameter('sim_betas', types=(int, float), default=1., fmt='.2f'),
            Parameter('sim_nbins', types=int, default=30, limits=[1, None],
                      fmt='d'),
            Parameter('dist_alphad', types=(int, float), default=.5,
                      fmt='.2f'),
            Parameter('dist_ibeta', types=int, default=2, fmt='d'),
            Parameter('dist_l', types=int, default=2, fmt='d'),
            Parameter('dist_sigmad', types=(int, float), default=1, fmt='.2f'),
            Parameter('gof_nbing', types=int, default=30, limits=[1, None],
                      fmt='d'),
            Parameter('gof_alphag', types=(int, float), default=1.5, fmt='.2f'),
            Parameter('kdist_alphak', types=(int, float), default=.5,
                      fmt='.2f'),
            Parameter('kdist_l', types=int, default=2, fmt='d'),
            Parameter('kdist_sigmak', types=(int, float), default=5.5,
                      fmt='.2f'),
            Parameter('mom_ngrid', types=int, default=75, fmt='d'),
            Parameter('mom_nmom', types=int, default=75, fmt='d'),
            Parameter('mom_basis', types=str,
                      allowed_values=['GEOM', 'LEGE', 'CHEB', 'KRAW', 'HAHN',
                                      'MEIX', 'CHAR', 'SHMA'],
                      default='KRAW', fmt='s'),
            Parameter('mom_ialg', types=int, limits=[1, 3], default=1, fmt='d'),
            Parameter('mom_mu', types=(int, float), default=.5, fmt='.2f'),
            Parameter('mom_nu', types=(int, float), default=.5, fmt='.2f'),
            Parameter('chords_ichord', types=int, default=3, limits=[1, 3],
                      fmt='d'),
            Parameter('chords_method', types=str,
                      allowed_values=['SIN', 'HIS', 'SUM'], default='SUM',
                      fmt='s'),
            Parameter('chords_value', types=int, limits=[1, 3], default=1,
                      fmt='d'),
            Parameter('chords_nbinc', types=int, default=30, limits=[1, None],
                      fmt='d'),
            Parameter('codes_nconnect', types=int, allowed_values=[3, 5, 9],
                      default=9, fmt='d'),
            Parameter('codes_scalec', types=(int, float), default=1,
                      fmt='.2f'),
            Parameter('cont_nbin', types=int, limits=[1, None], default=66,
                      fmt='d'),
            Parameter('cont_nlen', types=int, limits=[1, None], default=4,
                      fmt='d'),
            Parameter('cont_shan', types=str,
                      allowed_values=['CDIS', 'TANG', 'CURV', 'TRAR', 'BEAS',
                                      '8CCH', 'CLEN', 'CANG', 'ACDI', 'FOUR'],
                      default='TANG',
                      fmt='s'),
            Parameter('cont_ifou', types=int, limits=[1, 4], default=1,
                      fmt='d'),
            Parameter('cont_inorm', types=int, limits=[1, 4], default=2,
                      fmt='d'),
            Parameter('exp_filename', types=str, default='exp/experiment.txt',
                      fmt='s'),
            Parameter('calc_filename', types=(list,),
                      default=['calc/calculation.txt',],
                      fmt='s'),
            Parameter('calc_param1', types=(float, int), default=0.,
                      fmt='.2f'),
            Parameter('calc_param2', types=(float, int), default=0.,
                      fmt='.2f'),
        )
        BaseParameters.__init__(self)
        self.add_parameters(*parameters)
        self.freeze()
 class CompCurveGeneralParameters(BaseParameters):
    def __init__(self, compcurve_parameters):
        parameters = (
            Parameter('normalization', types=(type(None), str),
                      allowed_values=(None, "variance", "area", "max",
                                      "decimal_scaling", "zero_one"),
                      default="max"),
            Parameter('rescale', types=bool, default=True),
            Parameter('function', types=str, allowed_values=(
                "coordinates", "chi", "cdf", "curvature", "signature"),
                default="chi"),
        )
        BaseParameters.__init__(self)
        self.add_parameters(*parameters)
        self.compcurve_parameters = compcurve_parameters
        self.freeze()
    def bind_normalization(self, p):
        value = p.allowed_values.index(p.value)
        self.compcurve_parameters.general_norm = value
        LOGGER.info("Curve Comparison: Normalization mode set to "
                    f"\"{p.value}\"")
    def bind_rescale(self, p):
        self.compcurve_parameters.general_iscale = int(p.value)
        state = "deactivated"
        if p.value:
            state = "activated"
        LOGGER.info(f"Curve Comparison: Rescaling of data {state}")
    def bind_function(self, p):
        value = p.allowed_values.index(p.value)
        self.compcurve_parameters.general_icur = value
        LOGGER.info("Curve Comparison: Type of data used for comparison "
                    f"set to \"{p.value}\"")
--- a/src/msspec/spec/fortran/SConstruct
+++ b/src/msspec/spec/fortran/SConstruct
@ -18,6 +18,7 @@ phd_mi_noso_nosp_nosym = env_spec.FilteredGlob('phd_mi_noso_nosp_nosym/*.f', omi
 eig_common             = Glob('eig/common/*.f')
 eig_mi                 = env_spec.FilteredGlob('eig/mi/*.f', omit=['main.f'])
 eig_pw                 = env_spec.FilteredGlob('eig/pw/*.f', omit=['main.f'])
 comp_curves            = ['treatment/comp_curves.f']
 conf = Configure(env_spec, log_file='./config.log')
 if conf.CheckLib('lapack'):
@ -38,6 +39,7 @@ phd_mi_noso_nosp_nosym_obj = env_spec.Object(phd_mi_noso_nosp_nosym)
 eig_common_obj             = env_spec.Object(eig_common)
 eig_pw_obj                 = env_spec.Object(eig_pw)
 eig_mi_obj                 = env_spec.Object(eig_mi)
 comp_curves_obj            = env_spec.Object(comp_curves)
 Requires(memalloc_obj, dim_mod_obj)
@ -60,6 +62,10 @@ deps = common_deps + renormalization_obj + eig_common_obj + eig_pw_obj
 eig_pw_mod = env_spec.F2py('_eig_pw.so', ['eig/pw/main.f'] + deps)
 env_spec.InstallModule(eig_pw_mod)
 deps = comp_curves_obj
 comp_curve_mod = env_spec.F2py('_comp_curves.so', ['treatment/main.f'] + deps)
 env_spec.InstallModule(comp_curve_mod)
 # Alias
 env_spec.Alias('spec', [phd_se_mod, phd_mi_mod, eig_pw_mod, eig_mi_mod])
--- a/src/msspec/spec/fortran/treatment/comp_curves.f
+++ b/src/msspec/spec/fortran/treatment/comp_curves.f
--- a/src/msspec/spec/fortran/treatment/main.f
+++ b/src/msspec/spec/fortran/treatment/main.f
@ -0,0 +1,3 @@
      SUBROUTINE RUN()
          CALL COMP_CURVES()
      END SUBROUTINE RUN