Source code for pyswapp.manager

import pandas as pd

from .utils import *
from .stream import SeismicStream
from .curves import CombineCurves
from .qtapps import *

#TODO: filter based on radon domain
#TODO: remove raw from db?


[docs]
class BaseManager:
    def __init__(self, prjdir, path2raw=None, path2geom=None, settings=None, database='swa.db', overwrite = False, **kwargs):
        """
        Base manager class for surface wave analysis

        Parameters
        ----------
        prjdir : str, project directory
        path2raw : str, optional, path to the raw seismic data
        path2geom : str, optional, path to the geometry file
        settings : DataFrame, settings for the processing and visualisation
        database : str, name of the database
        overwrite: bool, optional, overwrite database
        """

        self.logger = create_logging(name='Manager')

        # check input files
        assert_exists(path2raw, os.path.isdir, "Raw data folder")
        assert_exists(path2geom, os.path.isfile, "Geometry file")

        # ensure single QApplication
        self.app = QApplication.instance()
        if self.app is None:
            self.app = QApplication([])
        #self.app = QApplication(sys.argv)

        self.prjdir = prjdir
        self.path2raw = path2raw
        self.path2geom = path2geom

        self.settings = settings

        self.database = database

        self._init_params()

        self.path2db = os.path.join(self.prjdir, self.database)

        # overwrite database if it exists to create new project
        if overwrite:
            try:
                os.remove(self.path2db)
            except OSError as e:
                raise OSError(f"Could not delete database: {e}") from e
            else:
                self.logger.info(f"SQL database {self.path2db} deleted.")

        # create/load project
        if os.path.isfile(self.path2db):
            self._load_project(**kwargs)
        else:
            self._create_project(**kwargs)

    def _create_project(self,**kwargs):
        """Create the project directory and database"""

        print('Creating project:')
        # set up project directory
        create_projectdir(self.prjdir)

        # check if data exists in project directory
        if (len(os.listdir(os.path.join(self.prjdir,'01_data/raw'))) == 0):
            if (self.path2raw is not None):

                rename = kwargs.pop('rename', False)
                if rename:
                    print('Copying and renaming raw data into project directory ..... ' , end="")
                else:
                    print('Copying raw data into project directory ..... ', end="")
                starttime = time.time()
                _, self.ext = get_fileList(self.path2raw)
                rename_files(self.path2raw, extension=self.ext, prjdir=self.prjdir,rename = rename)
                endtime = time.time()
                print(f'{np.round(endtime - starttime, 2)} s')

            else:
                raise FileNotFoundError('Raw data not found.')

        self.path2raw = os.path.join(self.prjdir, '01_data/raw')
        self.fileList, self.ext = get_fileList(self.path2raw)

        # check if geometry exists in project directory
        geometry_path = os.path.join(self.prjdir, '02_geom', 'geometry.csv')

        # Check if geometry.csv exists in project directory
        if not os.path.isfile(geometry_path):
            if self.path2geom is not None and os.path.isfile(self.path2geom):
                print('Copying geometry into project directory ..... ', end="")
                starttime = time.time()
                shutil.copy(self.path2geom, geometry_path)
                endtime = time.time()
                print(f'{np.round(endtime - starttime, 2)} s')
            else:
                print('geometry.csv file not provided or found. Trying to create from data ..... ', end="")
                starttime = time.time()
                try:
                    create_geometry(self.fileList, geometry_path)
                    endtime = time.time()
                    print(f'{np.round(endtime - starttime, 2)} s')

                except Exception:

                    endtime = time.time()
                    print(f'{np.round(endtime - starttime, 2)} s')

                    self.logger.error('No geometry file found or created.')

                    print('Creating figures for data preview mode ..... ', end="")
                    starttime = time.time()
                    figures = self.get_figures()
                    endtime = time.time()
                    print(f'{np.round(endtime - starttime, 2)} s')

                    # data preview
                    self.data_preview(figures)

        self.path2geom = os.path.join(self.prjdir, '02_geom/geometry.csv')

        self._sql = SQL(database=self.path2db)

        # survey geometry
        self._sql.read_geometry(self.path2geom)

        # processing settings
        if (self.settings is None) or (not isinstance(self.settings, pd.core.frame.DataFrame)):
            self.logger.info('No settings provided or provided settings not of type dict.'
                             '\nUsing default settings instead.')
            self.settings = create_settings()
        self._sql.read_setting(self.settings)

        # seismic data
        self.create = True
        self._read_data()

        print('\nExisting procsets:', *self._sql.get_proc_labels())

        # set procset
        self.set_new_procset(procset='proc1')

        print('')

    def _load_project(self,**kwargs):
        """Load the project"""

        print('Loading project:')

        self.path2raw = os.path.join(self.prjdir, '01_data/raw')
        self.path2geom = os.path.join(self.prjdir, '02_geom/geometry.csv')
        path2proc = os.path.join(self.prjdir, '03_proc')

        # check project directory
        assert_exists(self.path2raw, os.path.isdir, "Raw data folder")
        assert_exists(self.path2geom, os.path.isfile, "Geometry file")
        assert_exists(path2proc, os.path.isdir, "Proc data folder")

        self.fileList, self.ext = get_fileList(self.path2raw)
        if not self.fileList:
            raise FileNotFoundError(f'No raw data files found in "{self.path2raw}".')

        self._sql = SQL(database=self.path2db)

        # load settings
        self.settings = self._sql.get_table('settings')

        self.create = False
        self._read_data()

        for table in self._sql.get_tables():
            self._sql.delete_data(table, {'procset': "'%s'" % 'tmp'})

        # set/load procset label
        prc_sets = self._sql.get_proc_labels()

        if len(prc_sets) > 1:
            self.set_new_procset(procset=prc_sets[-1])
        else:
            self.set_new_procset(procset='proc1')

        print('\nExisting procsets:', *prc_sets)

        if kwargs.pop('load', True):
            # load processed data
            if len(prc_sets) > 1:
                self.load_procset(prc_sets[-1])

        print('')

    def _init_params(self):
        """initialize some parameters"""

        # dictionary storing stream stream data
        self.data = {}

        # procset label
        self._procset = 'proc1' # procset to store new process
        self._loadset = 'proc1' # procset to load process from

        # current stream
        self.current_stream = None
        self.selected_ids = (1, 1)


[docs]
    def set_new_procset(self, procset = None, verbose = True):
        """set the new active processing name"""
        if (procset != 'raw') & (isinstance(procset,str)):
            self._procset = procset
            if verbose:
                print('Active procset label: %s' % procset)
        else:
            self._procset = 'proc1'
            if verbose:
                self.logger.warning('Active procset label should be different from "raw". '
                                    'Setting to default label "proc1".')



[docs]
    def set_procset_label(self, procset = None, verbose = True):
        """set the new active processing name"""
        self.set_new_procset(procset , verbose)



[docs]
    def set_loadset(self, procset = None):
        """set the loadset label, i.e. which dataset to load"""
        if isinstance(procset,str):
            self._loadset = procset
        else:
            self._loadset = 'proc1'



[docs]
    def load_procset(self, procset = None, **kwargs):
        """load processed data from database"""

        if not isinstance(procset, str):
            self.logger.warning('Procset label needs to be a string.')

        elif not procset in self._sql.get_proc_labels():
            self.logger.warning(f'Procset {procset} does not exist.')

        else:
            self._loadset = procset

            print('Loading amplitude data from "%s" ..... ' % self._loadset, end="")
            starttime = time.time()

            shots = self._sql.get_table('shots')

            for i in range(len(shots)):

                sin = shots.loc[i, 'sin']
                rep = shots.loc[i, 'rep']

                data = self.data
                if sin in data.keys():
                    if rep in data[sin].keys():
                        self._set_data(self.data[sin][rep], sin, rep, procset=self._loadset)
                        self._set_FV(self.data[sin][rep], sin, rep, procset=self._loadset, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')


    def _get_filepath(self, sin, rep=1):
        """Return the file path corresponding to the indices sin and rep as string"""
        if self.fileList is not None:
            sn = self._sql.get_shotfile(sin, rep).shots.item()

            for i, fp in enumerate(self.fileList):
                path, fn = os.path.split(fp)
                sfn = re.findall(r'\d+', fn.replace(self.ext, ''))[0]

                if int(sn) == int(sfn):
                    return os.path.join(path,fn)

        return None

    def _read_data(self):
        """Initialize stream objects for all seismic shot files in the geometry file and apply the survey geometry"""

        shts = self._sql.get_table('shots')
        self.data = {}

        print('Reading seismic shot files .....', end="")
        starttime = time.time()

        nfiles = 0
        for i in shts.index:
            sin = shts.loc[i, 'sin']
            rep = shts.loc[i, 'rep']
            fn = self._get_filepath(sin, rep)

            if fn is not None:
                sht_geom, rec_geom = self._sql.get_geometry(sin, rep)
                st = SeismicStream(fn, settings=self.settings)
                st.apply_geometry(sht_geom, rec_geom)

                if sin not in self.data.keys():
                    self.data[sin] = {}

                self.data[sin][rep] = st
                nfiles += 1

                if sin in self.data.keys():
                    if rep in self.data[sin].keys():
                        self._write_data(self.data[sin][rep], sin, rep, 'raw')
                        if self.create:
                            self._write_data(self.data[sin][rep], sin, rep, 'proc1')
            else:
                #pass
                self.logger.error(f'Shot file {fn} not found.')

        endtime = time.time()
        print(f' {np.round(endtime - starttime, 2)} s')
        print(f'Read {nfiles} files and applied geometry and settings.')


[docs]
    def print_stats(self, which = 'stream'):
        """print stream information"""
        stream = self.current_stream
        stream.print_stats(which)


    def _write_data(self, data, sin, rep, procset, wid=-1):
        """write processed data to database"""
        self._sql.write_data(data, sin, rep, procset, wid)

    def _write_FV(self, data, sin, rep, procset, wid=-1):
        """write FV to database"""
        self._sql.write_FV(data, sin, rep, procset, wid)

    def _get_data(self, sin, rep, procset, wid=-1):
        """get processed data from database"""
        par, amps, recs, sht = self._sql.read_data(sin, rep, procset=procset, wid=wid)

        return par, amps, recs, sht

    def _get_FV(self, sin, rep, procset, wid=-1, method='phaseshift'):
        """get FV data from database"""
        vel, kw, freq, FV = self._sql.read_FV(sin, rep, procset=procset, wid=wid, method=method)
        return vel, kw, freq, FV

    def _set_data(self, data, sin, rep, procset, wid=-1):
        """set processed data from database to current stream"""
        par, amps, recs, sht = self._get_data(sin, rep, procset=procset, wid=wid)
        if amps is not None:
            amps_ari = amps.transpose()
            data.update_pst(amps_ari, sht, recs, par)

    def _set_FV(self, data, sin, rep, procset, wid=-1, method='phaseshift'):
        """set FV data from database to current stream"""
        vel, kw, freq, FV = self._get_FV(sin, rep, procset=procset, wid=wid, method=method)
        if FV is not None:
            data.update_FV(method, vel, kw, freq, FV)
            return True
        else:
            return False

    # %% Processing

[docs]
    def select_data(self, sin=1, rep=1, inplace=True, verbose = True):
        """
        Select one stream object based on source location and shot repetition indices

        Parameters
        ----------
        sin : int, shot index number
        rep : int, repeated shot index number
        inplace : bool, default True, whether to modify the stream object rather than creating a new one
        verbose : print current selection to console

        Returns
        -------
        stream object

        """

        data = self.data

        if sin in data.keys():
            if rep in data[sin].keys():
                selection = data[sin][rep]

                self.selected_ids = (sin, rep)

                if inplace:
                    self.current_stream = selection
                    if verbose:
                        print(f'Currently selected data: (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})')
                return selection
            else:
                raise KeyError(f'The key rep = {rep} does not exist for sin = {sin}.')
        else:
            raise KeyError(f'The key sin = {sin} does not exist.')



[docs]
    def delete(self, procset = None, table = 'amps', params = None):

        if params is None:
            params = {}

        procset = procset or self._procset
        params["procset"] = f"'{procset}'"

        self._sql.delete_data(table, params)


    def _preprocess(self, type, procset=None, use_windows = True, **kwargs):
        """apply preprocessing steps to current selection or all data sets"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        stream = self.current_stream

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            tmp = copy.deepcopy(stream)
            self._set_data(tmp, sin, rep, self._loadset, wid)
            func = getattr(tmp, type)
            func(**kwargs)
            if wid == -1:
                self.data[sin][rep] = tmp
            self._write_data(tmp, sin,rep, procset, wid)


[docs]
    def preprocess(self, type, procset=None, use_windows = True, **kwargs):
        """
        apply preprocessing steps to current selection or all data sets

        Parameters
        ----------
        type : str, which processing method to call
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs : arguments for the processing

        """
        self._preprocess(type, procset, use_windows, **kwargs)



[docs]
    def save_filter(self, fname = 'filter.csv', ftype='FK', procset=None):
        """
        Save filter to file in tabular format

        Parameters
        ----------
        fname: str, filename to save data to disk
        procset :  str, identifier to set on which dataset the processing should be applied to
        ftype : string, default 'FK', filter type

        """

        procset = procset or self._procset

        params = {
            'procset': f"'{procset}'",
            'type': f"'{ftype}'",
        }

        try:
            df = self._sql.read_filter(params)
            df.sort_values(['sin', 'rep','wid','key'], ascending=[True, True,True,True], inplace=True)

            df.to_csv(fname, index=False)
        except:
            self.logger.error('Filter could not be saved to file.')



[docs]
    def import_filter(self,fname):
        """
        Read filter from file in tabular format

        Parameters
        ----------
        fname: str, filename to import

        """

        df = pd.read_csv(fname)

        procset = df['procset'].unique()[0]
        filter_type = df['type'].unique()[0]

        params = {
            "procset": f"'{procset}'",
            "type": f"'{filter_type}'",
        }

        if not self._sql.check_data('filter', params):
            self._sql.delete_data('filter', params)

        self._sql.to_sql(df, name='filter', if_exists='append', index=False)



[docs]
    def read_filter(self, ftype='FK', procset=None, use_windows=True, uniq_per_rep = True, uniq_per_wid = True):
        """
        Read manual filter from database.

        Parameters
        ----------
        ftype : string, default 'FK', filter type
        procset :  str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        uniq_per_rep : bool, default True, whether a unique filter exists for each rep
        uniq_per_wid : bool, default True, whether a unique filter exists for each wid

        Returns
        -------
        points_top (list), points_bot (list)
        """

        procset = procset or self._procset

        sin, rep = self.selected_ids

        params_template = {
            "procset": f"'{procset}'",
            "sin": sin,
            "type": f"'{ftype}'",
        }

        if uniq_per_rep:
            params_template['rep'] = rep

        points_top = []
        points_bot = []

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            if uniq_per_wid:
                params = {**params_template, "wid": wid}
            else:
                params = params_template
            df = self._sql.read_filter(params)
            pt, pb = filter_df2dict(df)
            points_top.append(pt)
            points_bot.append(pb)

        return points_top, points_bot


    def _apply_filter(self,ftype='FK', points_top = None, points_bot = None,procset=None, use_windows=True, **kwargs):
        """
        Apply manual filter from database.

        Parameters
        ----------
        ftype : string, default 'FK', filter type
        points_top : list, top filter
        points_bot : list, bottom filter
        procset :  str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs

        """

        procset = procset or self._procset

        sin, rep = self.selected_ids
        stream = self.current_stream

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for i, wid in enumerate(wids):
            tmp_stream = copy.deepcopy(stream)
            self._set_data(tmp_stream, sin, rep, self._loadset, wid)

            if ftype == "FK":
                # Validate list lengths
                try:
                    pt = points_top[i]
                    pb = points_bot[i]
                except IndexError:
                    self.logger.error(
                        f"Mismatch between number of windows ({len(wids)}) "
                        f"and provided filter point sets."
                    )
                    continue

                tmp_stream.apply_fk_filter([pt, pb], ["t", "b"], **kwargs)
            else:
                raise NotImplementedError

            tmp_stream.tapered_amps = 1
            self._write_data(tmp_stream, sin, rep, procset, wid)


[docs]
    def apply_filter(self,ftype='FK', points_top = None, points_bot = None,procset=None, use_windows=True, **kwargs):
        """
        Apply manual filter from database.

        Parameters
        ----------
        ftype : string, default 'FK', filter type
        points_top : list, top filter
        points_bot : list, bottom filter
        procset :  str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs

        """

        self._apply_filter(ftype, points_top, points_bot, procset=procset, use_windows=use_windows, **kwargs)



[docs]
    def apply_filter_2D(self, procset=None, apply_to='all', use_windows=True, ftype = 'FK',
                       uniq_per_rep = True, uniq_per_wid = True, **kwargs):
        """
        Apply filter to current selection or all data sets

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        ftype : string, default 'FK', filter type
        uniq_per_rep : bool, default True, whether a unique filter exists for each rep
        uniq_per_wid : bool, default True, whether a unique filter exists for each wid
        """

        if procset is None:
            procset = self._procset

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace=True, verbose=False)

            starttime = time.time()
            print(f'Applying {ftype} filter to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]}) ..... ', end='')

            points_top, points_bot = self.read_filter(ftype =ftype, procset = procset, use_windows = use_windows,
                                                      uniq_per_rep = uniq_per_rep, uniq_per_wid = uniq_per_wid)
            self._apply_filter(ftype, points_top, points_bot, procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():
                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rApplying FK filter to (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    points_top, points_bot = self.read_filter('FK', procset, use_windows,
                                                      uniq_per_rep = uniq_per_rep, uniq_per_wid = uniq_per_wid)
                    self._apply_filter('FK', points_top, points_bot, procset, use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)


    def _transform(self, method='phaseshift', procset=None, use_windows=True, **kwargs):
        """apply wavefield transformation to current selection or all data sets"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        stream = self.current_stream

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            tmp = copy.deepcopy(stream)
            self._set_data(tmp, sin, rep, self._loadset, wid)
            tmp.transform(method=method, **kwargs)
            if wid == -1:
                self.data[sin][rep] = tmp
            self._write_FV(tmp,  sin, rep, procset, wid)


[docs]
    def transform(self, method='phaseshift', procset=None, use_windows=True, **kwargs):
        """
        apply wavefield transformation to current selection or all data sets

        Parameters
        ----------
        method : str, default 'phaseshift', which transformation method to apply to the data
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs : arguments for the processing

        """
        self._transform(method, procset, use_windows, **kwargs)


    def _extract_dc(self, stream, sin, rep, procset, wid, method, **kwargs):
        """extract a dispersion curve and write to database"""

        pck_mode = 'auto'

        try:
            if method == 'MOPA':
                auto_method = method
            else:
                auto_method = 'max'
            stream.dcpicking(pck_mode=pck_mode, auto_method = auto_method, **kwargs)
        except:
            warnings.warn('No dispersion image generated prior to dispersion curve extraction'
                          'Dispersion curve extraction not possible.')
            pass

        #method = stream.trafo_type
        xmid = stream.midpoint

        for pck_mode in stream.picks.keys():
            for dc_mode in stream.picks[pck_mode].keys():
                dc = {
                    'xmid': xmid,
                    'method': stream.extraction_method,
                    'dc_mode': dc_mode,
                    'frequency': stream.picks[pck_mode][dc_mode]['frequency'],
                    'velocity': stream.picks[pck_mode][dc_mode]['velocity'],
                    'error': np.zeros(len(stream.picks[pck_mode][dc_mode]['velocity']))}

                self._sql.write_curve(dc, sin, rep, procset, wid, xmid)

    def _extract(self, procset=None, use_windows=True, method = None, **kwargs):
        """apply dispersion curve picking to a stream"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        if self.current_stream is None:
            self.select_data(inplace=True, verbose=False)
        stream = self.current_stream


        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            tmp = copy.deepcopy(stream)
            self._set_data(tmp, sin, rep, procset, wid)

            if wid == -1:
                trafo_labels = self._sql.get_trafo_labels(procset, use_windows=False)
            else:
                trafo_labels = self._sql.get_trafo_labels(procset, use_windows=True)

            # if transformation exists set dispersive energy
            if (len(trafo_labels) > 0) and (method != 'MOPA'):
                self._set_FV(tmp, sin, rep, procset, wid, method=trafo_labels[-1])

            self._extract_dc(tmp, sin = sin, rep = rep, wid = wid, procset=procset,
                          method = method, **kwargs)


[docs]
    def extract(self, procset=None, use_windows=True, method='max', **kwargs):
        """
        apply dispersion curve extraction to a stream

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows
        method : str, default 'max', extraction method
        kwargs : arguments for the extraction

        """
        self._extract(procset, use_windows, method, **kwargs)


    def _process_curve(self, type, params, method = None,
                       procset = None, **kwargs):
        """apply a process to the dispersion curve data"""

        if procset is None:
            procset = self._procset

        curve_data = self._sql.read_curve(params)

        if curve_data.empty:
            return

        # Initialize and process the dispersion curve
        dc_obj = DispersionCurve()
        dc_obj.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])
        func = getattr(dc_obj, type)
        dc_obj = func(**kwargs)  # processed DispersionCurve

        # Get unique xmid values
        xmids = curve_data['xmid'].unique()

        for xmid in xmids:
            dc_record = {
                'xmid': xmid,
                'method': method,
                'dc_mode': params['dc_mode'],
                'frequency': dc_obj.frequency,
                'velocity': dc_obj.velocity,
                'error': dc_obj.error
            }

            self._sql.write_curve(
                dc_record,
                params['sin'],
                params['rep'],
                procset,
                params['wid'],
                xmid
            )

    def _get_stream_kwargs(self, stream):
        """return some stream properties"""

        receiver = stream.receiver
        source = stream.source
        offsets = receiver - source
        stream_kwargs = {
            'offsets': offsets,
            'nchannels': len(receiver),
            'dx': abs(receiver[0] - receiver[1])
        }
        return stream_kwargs


[docs]
    def process_curves(self, type, procset=None, method = None,
                      dc_mode=0, use_windows=True, **kwargs):
        self.process_curve(type, procset, method, dc_mode, use_windows, **kwargs)



[docs]
    def process_curve(self, type, procset=None, method = None,
                      dc_mode=0, use_windows=True, **kwargs):
        """
        apply a process to a dispersion curve

        Parameters
        ----------
        type : str, which processing method to call
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs : arguments for the processing

        """

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        stream = self.data[sin][rep]

        # Common params template
        params_template = {
            'procset': f"'{self._loadset}'",
            'method': f"'{method}'",
            'dc_mode': dc_mode,
            'sin': sin,
            'rep': rep
        }

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            params = params_template.copy()
            params['wid'] = wid

            tmp_stream = copy.deepcopy(stream)
            self._set_data(tmp_stream, sin, rep, self._loadset, wid)

            if type == 'estimate_error':
                stream_kwargs = self._get_stream_kwargs(tmp_stream)
                kwargs.update(stream_kwargs)

            self._process_curve(type, params, method=method, procset=procset, **kwargs)


    def _save_dc(self, path2dc, name, params, format = 'csv', **kwargs):
        """save a dispersion curve"""

        curve_data = self._sql.read_curve(params)

        if curve_data.empty:
            return None

        dc = DispersionCurve()
        dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])
        dc.save(path2dc, name, format=format, **kwargs)

        return curve_data['xmid'].unique()[0]

    def _save(self, procset=None, method = None,
                   dc_mode=0, use_windows=True, **kwargs):

        """save a dispersion curve for a defined source location, repetition and window id"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids

        # Directory to save processed curves
        dir_path = os.path.join(self.prjdir, f'03_proc/{procset}_{method}/Mode{int(dc_mode)}')
        safe_makedirs(dir_path)

        # Base params template
        params_template = {
            'procset': f"'{procset}'",
            'method': f"'{method}'",
            'dc_mode': dc_mode,
            'sin': sin,
            'rep': rep
        }

        # Determine wids to iterate
        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        xmids = [
            self._save_dc(
                dir_path,
                f"dc_sin{sin}-rep{rep}-wid{wid}" if wid != -1 else f"dc_sin{sin}-rep{rep}",
                {**params_template, 'wid': wid},
                **kwargs
            )
            for wid in wids
        ]

        # Filter out None values
        xmids = [xmid for xmid in xmids if xmid is not None]

        return xmids


[docs]
    def save(self, procset=None, method = None,dc_mode=0, use_windows=True, **kwargs):
        """
        save a dispersion curve for a defined source location, repetition and window id

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        use_windows : bool, default True, whether to apply the processing to windows

        """

        self._save(procset, method,dc_mode, use_windows, **kwargs)



[docs]
    def save_stream(self, procset=None):
        """save stream in .mseed file format"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        stream = self.data[sin][rep]
        pre = stream.pre

        dir_path = os.path.join(self.prjdir, f'03_proc/{procset}/streams/')
        safe_makedirs(dir_path)

        stream.save_stream(dir_path, pre)



[docs]
    def save_streams(self, procset = None, apply_to = 'all'):
        """save streams in .mseed file format"""

        if procset is None:
            procset = self._procset

        dir_path = os.path.join(self.prjdir, f'03_proc/{procset}/streams/')

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Save seismic stream corresponding to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})'
                  f' to "{dir_path}" ..... ', end = '')
            self.save_stream(procset=procset)
            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rSave seismic stream corresponding to (SIN,REP) = '
                          f'({sin}, {rep}) to "{dir_path}" ..... ')
                    sys.stdout.flush()

                    self.save_stream(procset=procset)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')


    def _plot(self, type='seismogram', procset = None, use_windows=True, **kwargs):
        """plot portions of the stream data"""

        if procset is None:
            procset = self._procset

        sin, rep = self.selected_ids
        stream = self.current_stream
        stream = copy.deepcopy(stream)

        method = kwargs.pop('method', 'phaseshift')
        if type in ['FV','FVComposite']:
            self._set_FV(stream, sin, rep, procset=procset, method=method)

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]
        window_axes = []

        for wid in wids:
            tmp = copy.deepcopy(stream)
            self._set_data(tmp, sin, rep, procset, wid)

            if type in ['FV','FVComposite']:
                self._set_FV(tmp, sin, rep, procset=procset, method=method, wid=wid)

            fig = tmp.plot(type, **kwargs)
            window_axes.append(fig)
        return window_axes, wids


[docs]
    def plot_curve(self, procset=None,
                   use_windows=True, method='phaseshift', **kwargs):
        """
        plot a dispersion curve

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        use_windows : bool, default True, whether to apply the processing to windows

        """

        if procset is None:
            procset = self._procset

        sin,rep = self.selected_ids

        params_template = {'procset': "'%s'" % procset,
                  'method': "'%s'" % method,
                  'sin': sin,
                  'rep': rep}

        cmap = getattr(plt.cm, 'Greys')
        color_map = cmap(np.linspace(0, 1, 10))

        wids = self._sql.get_wids(sin, rep, procset)
        wids = wids if (wids and use_windows) else [-1]

        for wid in wids:
            params = params_template.copy()
            params['wid'] = wid

            curve_data = self._sql.read_curve(params)
            if curve_data.empty:
                continue

            # handle dc_mode as array; ensure colors array indexing works
            dc_modes = curve_data['dc_mode'].astype(int)
            if 'color' in kwargs:
                colors = kwargs.pop('color','dodgerblue')
            else:
                colors = color_map[dc_modes % len(color_map)]

            dc = DispersionCurve()
            dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])
            dc.plot(color=colors, edgecolor='k', **kwargs)



[docs]
    def plot_pseudosection(self, procset=None, **kwargs):
        """
        plot the Rayleigh wave phase velocity pseudosection

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to

        """

        if procset is None:
            procset = self._procset

        # Default kwargs
        method = kwargs.pop('method', 'phaseshift')
        dc_mode = kwargs.pop('dc_mode', 0)
        cmap = kwargs.pop('cmap', 'viridis')
        show = kwargs.pop('show', True)
        axes = kwargs.pop('axes', None)
        vmin = kwargs.pop('vmin', 200)
        vmax = kwargs.pop('vmax', 500)
        title = kwargs.pop('title', '')
        outfile = kwargs.pop('outfile', None)

        _, recs_all = self._sql.get_geometry(sin='*')
        params = {'procset': f"'{procset}'", 'method': f"'{method}'", 'dc_mode': f"{dc_mode}"}
        curves = self._sql.read_curve(params)
        dx = kwargs.pop('width', np.median(abs(np.diff(recs_all.rx))))

        if curves.empty:
            self.logger.warning('No dispersion curves in database. Pseudosection not visualized.')
            return None

        fmin = kwargs.pop('fmin', curves['frequency'].min())
        fmax = kwargs.pop('fmax', curves['frequency'].max())

        if axes is None:
            fig, ax = plt.subplots(figsize = (6,4))
        else:
            ax = axes
            fig = ax.figure

        for xmid, sub in curves.groupby('xmid'):
            dc = DispersionCurve()
            dc.init_data(sub['frequency'], sub['velocity'], sub['error'])
            dc.plotColumn(
                axes=ax,
                xmid=xmid,
                vmin=vmin,
                vmax=vmax,
                cmap=cmap,
                y_value='frequency',
                width = dx,
                **kwargs
            )

        plot_colorBar(ax, vmin, vmax, cmap=cmap, orientation='vertical')
        ax.set_xlim([recs_all['rx'].min(), recs_all['rx'].max()])
        ax.set_ylim([fmin, fmax])
        ax.set_title(title)

        if outfile:
            fig.savefig(outfile)

        if show:
            plt.tight_layout()
            plt.show()
            return None

        return ax



[docs]
    def plot(self, type='seismogram', procset = None, use_windows=True, **kwargs):
        """
        plot portions of the stream data

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows

        """

        if procset is None:
            procset = self._procset

        if type == 'pseudosection':
            ax = self.plot_pseudosection(procset, **kwargs)
            return ax
        elif type == 'curve':
            self.plot_curve(procset,use_windows, **kwargs)
        else:
            self._plot(type,procset,use_windows,**kwargs)

        return None



[docs]
    def get_figures(self, type = ''):
        """return plot for each data set"""

        figures = []

        fileList = natural_sort(self.fileList)
        for i, fn in enumerate(fileList):
            st = SeismicStream(fn, settings=self.settings)
            fig = st.plot(type,show=False)
            figures.append(fig)

        return figures



[docs]
    def data_preview(self, figures):
        """data preview modus"""

        window = FigureSwitcher(figures)
        window.resize(800, 600)
        window.show()
        sys.exit(self.app.exec())


    def _get_windows(self,procset):

        labels = []

        for sin in self.data.keys():
            for rep in self.data[sin].keys():
                wids = self._sql.get_wids(sin, rep, procset)
                labels.append([[sin, rep, x] for x in wids])

        return labels


[docs]
    def gui_interact(self, type='pick', domain = 'FV', procset = None, use_windows=True,**kwargs):
        """
        interactive figure switcher

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows

        """

        if procset is None:
            procset = self._procset

        window_title = 'SWA - Interactive Figure Viewer'

        if type == 'pick':#in ['dispersionImage', 'FV']:
            domain = 'FV'
            DataSwitcher = DataSwitcherPick
        elif type == 'filter':
            if domain == 'FK':
                DataSwitcher = DataSwitcherFilterFK
            elif domain == 'TX':
                DataSwitcher = DataSwitcherFilterTX
            else:
                self.logger.error(f'Filtering is not possible in domain "{domain}"')
                return
        else:
            self.logger.error(f'Interactive figure switcher does not exist for plot type "{type}"')
            return

        print('Loading the GUI ..... ', end="")
        starttime = time.time()

        wids = self._sql.wids_exist(self._loadset)

        if wids and use_windows:
            window = DualDataSwitcher(self.data, self.path2db, plot=domain,
                                        DataSwitcher=DataSwitcher,
                                        procset=procset,
                                        procsets=self._sql.get_proc_labels(),
                                        window_title=window_title,select_plot = False, **kwargs)

        else:
            window = DataSwitcher(self.data, self.path2db, plot=domain,
                                    procset = procset,
                                    procsets = self._sql.get_proc_labels(),
                                    window_title=window_title,select_plot = False,**kwargs)

        window.resize(800, 600)
        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')
        window.show()

        def handle_about_to_quit():
            window.clean()

        self.app.aboutToQuit.connect(handle_about_to_quit)
        self.app.exec()

        plt.close('all')

        self.set_loadset(procset)



[docs]
    def gui_view(self, type='', procset = None, use_windows=True,**kwargs):
        """
        figure switcher

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        use_windows : bool, default True, whether to apply the processing to windows

        """

        if type not in ['seismogram','','TX','spectrogram','FX','spectra',
                        'FK','SFR','dispersionImage','FV', 'curve', 'DC']:
            self.logger.error(f'AttributeError: {type} does not exist.')

        if type == '' or type == 'seismogram':
            type = 'TX'

        if type == 'spectrogram':
            type = 'FX'

        if type == 'dispersionImage':
            type = 'FV'

        if type == 'curve':
            type = 'DC'

        window_title = 'SWA - Figure Viewer'
        DataSwitcher = DataSwitcherBase

        print('Loading the GUI ..... ', end="")
        starttime = time.time()

        wids = self._sql.wids_exist(self._loadset)

        if wids and use_windows:
            window = DualDataSwitcher(self.data, self.path2db, plot=type, DataSwitcher=DataSwitcher,
                                        procset=procset,
                                        procsets=self._sql.get_proc_labels(),
                                        window_title=window_title,**kwargs)

        else:
            window = DataSwitcher(self.data, self.path2db, plot=type,
                                    procset = procset,
                                    procsets = self._sql.get_proc_labels(),
                                    window_title=window_title,**kwargs)

        window.resize(800, 600)
        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')
        window.show()

        def handle_about_to_quit():
            window.clean()

        self.app.aboutToQuit.connect(handle_about_to_quit)
        self.app.exec()

        plt.close('all')





[docs]
class MASW2DManager(BaseManager):
    def __init__(self, prjdir, path2raw=None, path2geom=None, settings = None, database = 'swa.db',**kwargs):
        """
        MASW 2D manager class for surface wave analysis

        Parameters
        ----------
        prjdir : str, project directory
        path2raw : str, optional, path to the raw seismic data
        path2geom : str, optional, path to the geometry file
        settings : DataFrame, settings for the processing and visualisation
        database : str, name of the database
        """

        super().__init__(prjdir, path2raw, path2geom, settings, database,**kwargs)
        self.dc_locations = None
        self.dcs_per_location = None


[docs]
    def plot_streams(self, type='seismogram', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Plot the stream data

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if procset is None:
            procset = self._procset

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            self._plot(type,procset=procset, use_windows=use_windows, **kwargs)

        # Apply process to all data sets
        elif apply_to == 'all':
            for sin in self.data.keys():
                for rep in self.data[sin].keys():
                    self.select_data(sin, rep, inplace=True, verbose=False)
                    self._plot(type, procset=procset, use_windows=use_windows,**kwargs)



[docs]
    def plot(self, type='seismogram', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Plot the stream data

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if procset is None:
            procset = self._procset

        if type == 'pseudosection':
            ax = self.plot_pseudosection(procset, **kwargs)
            return ax

        else:
            self.plot_streams(type,procset,apply_to,use_windows,**kwargs)



[docs]
    def preprocess_streams(self, type='trim', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Apply preprocessing steps to current selection or all data sets

        Parameters
        ----------
        type : str, processing type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        # # check if process exists
        # if type not in ['filter', 'trim']:
        #     raise AttributeError(f'Attribute can be "filter" or "trim" not {type}.')

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Applying {type} to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]}) ..... ', end='')

            self._preprocess(type,procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rApplying {type} to (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    self._preprocess(type,procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)



[docs]
    def preprocess(self, type='trim', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Apply preprocessing steps to current selection or all data sets

        Parameters
        ----------
        type : str, processing type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        self.preprocess_streams(type, procset, apply_to, use_windows, **kwargs)



[docs]
    def apply_filter(self, procset=None, apply_to='all', use_windows=True, ftype = 'FK',
                       uniq_per_rep = True, uniq_per_wid = True, **kwargs):
        """
        Apply filter to current selection or all data sets

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        ftype : string, default 'FK', filter type
        uniq_per_rep : bool, default True, whether a unique filter exists for each rep
        uniq_per_wid : bool, default True, whether a unique filter exists for each wid
        """

        self.apply_filter_2D(procset, apply_to, use_windows, ftype,
                       uniq_per_rep, uniq_per_wid, **kwargs)



[docs]
    def transform_streams(self, method='phaseshift', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Apply wavefield transformation to current selection or all data sets

        Parameters
        ----------
        method : str, transformation method
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Applying {method} transformation to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})'
                  f' ..... ', end = '')
            self._transform(method,procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rApplying {method} transformation to (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    self._transform(method, procset=procset, use_windows=use_windows,**kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)



[docs]
    def transform(self, method='phaseshift', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Apply wavefield transformation to current selection or all data sets

        Parameters
        ----------
        type : str, transformation method
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        self.transform_streams(method, procset, apply_to, use_windows, **kwargs)



[docs]
    def extract_curves(self, procset = None, method = 'max',
                       apply_to = 'all', use_windows=True, **kwargs):
        """
        Extract dispersion curves

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, default 'max', method for dispersion curve extraction
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Dispersion curve extraction of (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})'
                  f' ..... ', end = '')
            self._extract(procset=procset,use_windows=use_windows, method = method, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rDispersion curve extraction of (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    self._extract(procset=procset,use_windows=use_windows, method = method,**kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')



[docs]
    def extract(self, procset = None, method = 'max',apply_to = 'all', use_windows=True, **kwargs):
        """
        Extract dispersion curves

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, default 'max', method for dispersion curve extraction
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        self.extract_curves(procset, method, apply_to, use_windows, **kwargs)



[docs]
    def process_curves(self, type='smooth', procset = None, method = None,
                       dc_mode = 0, apply_to = 'all', use_windows=True, **kwargs):
        """
        Apply a process to a dispersion curve

        Parameters
        ----------
        type : str, which processing method to call
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        kwargs : arguments for the processing

        """

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Applying {type} to dispersion curve of (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})'
                  f' ..... ', end = '')
            self.process_curve(type=type, procset=procset, method = method,
                               dc_mode=dc_mode, use_windows = use_windows, **kwargs)
            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rApplying {type} to dispersion curve of (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    self.process_curve(type=type, procset=procset, method = method,
                                       dc_mode=dc_mode, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')



[docs]
    def plot_curves(self, procset = None, method = None,
                       dc_mode = 0, apply_to = 'all', use_windows=True,**kwargs):
        """
        Plot a dispersion curve

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows

        """

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            self.plot_curve(procset=procset, method = method,dc_mode=dc_mode, use_windows=use_windows,**kwargs)

        # Apply process to all data sets
        elif apply_to == 'all':
            for sin in self.data.keys():
                for rep in self.data[sin].keys():
                    self.select_data(sin, rep, inplace=True, verbose=False)

                    self.plot_curve(procset=procset, method = method,
                                       dc_mode=dc_mode, use_windows=use_windows,**kwargs)



[docs]
    def save_curves(self, procset = None, method = None,
                       dc_mode = 0, apply_to = 'all', use_windows=True, **kwargs):
        """
        Save the dispersion curves based on receiver spread midpoint

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows

        """

        if procset is None:
            procset = self._procset

        dir = os.path.join(self.prjdir, f'03_proc/{procset}_{method}/Mode{int(dc_mode)}')
        path2geom = os.path.join(dir, '02_geom')
        safe_makedirs(path2geom)

        xmids = []

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Save dispersion curves corresponding to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]})'
                  f' to "{dir}" ..... ', end = '')
            xmid = self._save(procset=procset, method = method,dc_mode=dc_mode,
                                   use_windows=use_windows, **kwargs)
            xmids += xmid
            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rSave dispersion curves corresponding to (SIN,REP) = '
                          f'({sin}, {rep}) to "{dir}" ..... ')
                    sys.stdout.flush()

                    xmid = self._save(procset=procset, method = method,dc_mode=dc_mode,
                                           use_windows=use_windows, **kwargs)
                    xmids += xmid

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # save the xmids to file
        np.savetxt(os.path.join(path2geom, 'xmid.txt'), xmids)



[docs]
    def save(self, procset = None, method = None,
                       dc_mode = 0, apply_to = 'all', use_windows=True, **kwargs):
        """
        Save the dispersion curves based on receiver spread midpoint

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows

        """

        self.save_curves(procset, method,dc_mode, apply_to, use_windows, **kwargs)


    # %% WINDOWING

[docs]
    def moving_window(self, procset = None, apply_to = 'all', **kwargs):
        """
        Apply windowing to current selection or all data sets

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        kwargs : window settings
        """

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            starttime = time.time()
            print(f'Moving window along (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]}) ..... ', end='')

            windows = self.current_stream.moving_window(**kwargs)

            for wid in windows.keys():
                self._write_data(windows[wid], self.selected_ids[0],self.selected_ids[1], procset, wid)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():

                    sys.stdout.write(f'\rMoving window along (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    current_stream = self.select_data(sin=sin, rep=rep, inplace=False, verbose=False)
                    windows = current_stream.moving_window(**kwargs)

                    for wid in windows.keys():
                        self._write_data(windows[wid], sin, rep, procset, wid)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)


    # %% curve combination

[docs]
    def combine(self, procset = None, method = None, filter = True, dc_mode = 0, use_windows=True, **kwargs):
        """
        Combine dispersion curves with same receiver spread location

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curves
        filter:, bool, manually filter curves if True
        dc_mode : int, default 0, mode of propagation
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if procset is None:
            procset = self._procset

        params = {'procset': "'%s'" % procset, 'dc_mode': dc_mode}
        if method:
            params['method'] = "'%s'" % method
        if not use_windows:
            params['wid'] = -1

        # distinct dispersion curve locations
        dcs = self._sql.read_curve_cc(params)
        self.dcs_per_location = {k: v for k, v in dcs.groupby('xmid')}

        if filter:
            self.filter_CC()

        starttime = time.time()
        print(f'Combining dispersion curves ..... ', end='')

        for key in self.dcs_per_location.keys():

            data = self.dcs_per_location[key]

            CC = CombineCurves(data)
            combined_curve = CC.combination(**kwargs)

            data = {
                'xmid': key,
                'method': method,
                'dc_mode': dc_mode,
                 'frequency': combined_curve.frequency,
                 'velocity': combined_curve.velocity,
                 'error': combined_curve.error}

            self._sql.write_curve(data, -1, -1, procset=procset, wid=-1, xmid=key)

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')



[docs]
    def filter_CC(self):
        """Manually filter multiple dispersion curves"""

        data = self.dcs_per_location

        window = CurveFilter(data)
        window.setWindowTitle("SWA - Dispersion Curve Filtering")
        window.resize(800, 600)
        window.show()

        self.app.exec()
        self.dcs_per_location = window.data_filtered

        plt.close('all')



[docs]
    def process_CC(self, type='smooth', procset=None, method = None, dc_mode=0, **kwargs):
        """
        Process the combined dispersion curves

        Parameters
        ----------
        type : str, which processing method to call
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        kwargs : arguments for the processing

        """

        if procset is None:
            procset = self._procset

        starttime = time.time()
        print(f'Applying {type} to dispersion curves ..... ', end='')

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        curves = self._sql.read_curve(params)
        xmids = curves['xmid'].unique()

        for i, xmid in enumerate(xmids):
            params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                      'method': "'%s'" % method, 'dc_mode': dc_mode, 'xmid': xmid}

            self._process_curve(type, params, method, procset=procset, **kwargs)
            curve_data = self._sql.read_curve(params)
            dc = DispersionCurve()
            dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')



[docs]
    def plot_CC(self, procset=None, method = None, dc_mode = 0, pseudosection = True, **kwargs):
        """
        Plot the combined dispersion curves

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        pseudosection : bool, plot as pseudosection
        kwargs : arguments for the processing

        """
        if procset is None:
            procset = self._procset

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        _, recs_all = self._sql.get_geometry(sin='*')
        curves = self._sql.read_curve(params)
        dx = np.median(abs(np.diff(recs_all.rx)))

        if not curves.empty:

            xmids = curves['xmid'].unique()

            # plot dispersion curves individually
            if not pseudosection:
                for i, xmid in enumerate(xmids):
                    curve_data = curves[curves['xmid'] == xmid]
                    dc = DispersionCurve()
                    dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])
                    dc.plot(**kwargs)

            # plot pseudosection
            else:
                cmap = kwargs.pop('cmap', 'viridis')
                show = kwargs.pop('show', True)
                axes = kwargs.pop('axes', None)

                vmin = kwargs.pop('vmin', 200)
                vmax = kwargs.pop('vmax', 500)

                fmin = kwargs.pop('fmin', curves['frequency'].min())
                fmax = kwargs.pop('fmax', curves['frequency'].max())

                title = kwargs.pop('title', '')
                outfile = kwargs.pop('outfile', None)

                if axes is None:
                    fig, ax = plt.subplots(figsize = (6,4))
                else:
                    ax = axes
                    fig = ax.figure

                for xmid in xmids:
                    curve_data = curves[curves['xmid'] == xmid]
                    dc = DispersionCurve()
                    dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])

                    dc.plotColumn(axes=ax,
                                  xmid=xmid,
                                  vmin=vmin, vmax=vmax,
                                  cmap=cmap,
                                  y_value='frequency',
                                  width = dx, **kwargs)

                plot_colorBar(ax, vmin, vmax, cmap=cmap, orientation='vertical')
                ax.set_xlim([recs_all['rx'].min(), recs_all['rx'].max()])
                ax.set_ylim([fmin, fmax])

                ax.set_title(title)
                if outfile:
                    fig.savefig(outfile)

                if show:
                    plt.tight_layout()
                    plt.show()

                return ax




[docs]
    def save_CC(self, procset=None, method = None, dc_mode=0, format='csv', **kwargs):
        """
        Save the dispersion curves based on receiver spread midpoint

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        format : str, default 'csv', in which format the dispersion curve should be saved (dinver and ParkSeis support)

        """

        if procset is None:
            procset = self._procset

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        curves = self._sql.read_curve(params)
        xmids = np.sort(curves['xmid'].unique())

        # save the receiver spread midpoints
        path2dc = os.path.join(self.prjdir, f'03_proc/{procset}_{method}_CC/Mode{int(dc_mode)}')
        path2geom = os.path.join(path2dc, '1_geom')

        starttime = time.time()
        print(f'Saving disperion curves to "{path2dc}" ..... ', end='')

        safe_makedirs(path2geom)
        np.savetxt(os.path.join(path2geom, 'xmid.txt'), xmids)

        # save the dispersion curves
        for i, xmid in enumerate(xmids):
            params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                      'method': "'%s'" % method, 'dc_mode': dc_mode, 'xmid': xmid}
            self._save_dc(path2dc, 'dc%d' % i, params, format=format, **kwargs)

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')





[docs]
class Tomo2DManager(BaseManager):
    def __init__(self, prjdir, path2raw=None, path2geom=None, settings = None, database = 'swa.db',**kwargs):
        """
        Manager to run the tomographic-like approach from Barone et al. (2019)

        Parameters
        ----------
        prjdir : str, project directory
        path2raw : str, optional, path to the raw seismic data
        path2geom : str, optional, path to the geometry file
        settings : DataFrame, settings for the processing and visualisation
        database : str, name of the database
        """
        super().__init__(prjdir, path2raw, path2geom, settings, database,**kwargs)


[docs]
    def plot_streams(self, type='seismogram', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Plot the stream data

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if procset is None:
            procset = self._procset

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace = True, verbose=False)

            self._plot(type,procset=procset, use_windows=use_windows, **kwargs)

        # Apply process to all data sets
        elif apply_to == 'all':
            for sin in self.data.keys():
                for rep in self.data[sin].keys():
                    self.select_data(sin, rep, inplace=True, verbose=False)
                    self._plot(type, procset=procset, use_windows=use_windows,**kwargs)



[docs]
    def plot(self, type='seismogram', procset = None, apply_to = 'all', use_windows=True, **kwargs):
        """
        Plot the stream data

        Parameters
        ----------
        type : str, plot type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        if type == 'pseudosection':
            ax = self.plot_pseudosection(procset, **kwargs)
            return ax
        else:
            self.plot_streams(type,procset,apply_to,use_windows,**kwargs)



[docs]
    def prepare_streams(self, min_offset=-np.inf, max_offset=np.inf, min_rec = 6, max_rec = None, procset = None):
        """
        retrieve subsets of the data based on forward and reverse offset shots

        Parameters
        ----------
        min_offset : float, minimum offset (in m) to consider for trimming stream
        max_offset : float, maximum offset (in m) to consider for trimming stream
        min_rec : int, mininum number of receivers to keep for trimming down data
        max_rec : int, maximum number of receivers to keep for trimming down data
        procset : str, identifier to set on which dataset the processing should be applied to

        """

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        info = False

        starttime = time.time()

        for sin in self.data.keys():
            for rep in self.data[sin].keys():

                info_fw = False
                info_rw = False
                wid = -1

                sys.stdout.write(f'\rTrimming data based on offset (SIN,REP) = ({sin}, {rep}) ..... ')
                sys.stdout.flush()

                current_stream = self.select_data(sin=sin, rep=rep, inplace=False, verbose=False)

                # forward shots
                stream_fw = copy.deepcopy(current_stream)
                oids = stream_fw.trim_by_offset(min_offset, max_offset, nrec= max_rec)

                if len(oids) >= min_rec:
                    wid += 1
                    self._write_data(stream_fw, sin, rep, procset, wid)
                else:
                    info_fw = True

                # reverse shots
                stream_rw = copy.deepcopy(current_stream)
                oids = stream_rw.trim_by_offset( -max_offset, -min_offset, nrec= max_rec)

                if len(oids) >= min_rec:
                    wid += 1
                    self._write_data(stream_rw, sin, rep, procset, wid)
                else:
                    info_rw = True

                info = info_fw and info_rw

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')

        if info:
            self.logger.warn("Some shot files didn't fulfill the user-defined near-/far-offset definition."
                             "\n No trimming was applied to those.")

        self.set_loadset(procset)



[docs]
    def preprocess_streams(self, type='trim', procset=None, apply_to='all', use_windows=True, **kwargs):
        """
        Apply preprocessing steps to current selection or all data sets

        Parameters
        ----------
        type : str, processing type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        # # check if process exists
        # if type not in ['filter', 'trim']:
        #     raise AttributeError(f'Attribute can be "filter" or "trim" not {type}.')

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        # Apply process to current selection only
        if apply_to == 'cur':
            if self.current_stream is None:
                self.select_data(inplace=True, verbose=False)

            starttime = time.time()
            print(f'Applying {type} to (SIN,REP) = ({self.selected_ids[0]}, {self.selected_ids[1]}) ..... ', end='')

            self._preprocess(type, procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        # Apply process to all data sets
        elif apply_to == 'all':
            starttime = time.time()

            for sin in self.data.keys():
                for rep in self.data[sin].keys():
                    self.select_data(sin, rep, inplace=True, verbose=False)

                    sys.stdout.write(f'\rApplying {type} to (SIN,REP) = ({sin}, {rep}) ..... ')
                    sys.stdout.flush()

                    self._preprocess(type, procset=procset, use_windows=use_windows, **kwargs)

            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)



[docs]
    def preprocess(self, type='trim', procset=None, apply_to='all', use_windows=True, **kwargs):
        """
        Apply preprocessing steps to current selection or all data sets

        Parameters
        ----------
        type : str, processing type
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        """

        self.preprocess_streams(type, procset, apply_to, use_windows, **kwargs)



[docs]
    def apply_filter(self, procset=None, apply_to='all', use_windows=True, ftype = 'FK',
                       uniq_per_rep = True, uniq_per_wid = True, **kwargs):
        """
        Apply filter to current selection or all data sets

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        apply_to : str, default 'all', whether to apply function to all streams or just the current selection
        use_windows : bool, default True, whether to apply the processing to windows
        ftype : string, default 'FK', filter type
        uniq_per_rep : bool, default True, whether a unique filter exists for each rep
        uniq_per_wid : bool, default True, whether a unique filter exists for each wid
        """

        self.apply_filter_2D(procset, apply_to, use_windows, ftype,
                       uniq_per_rep, uniq_per_wid, **kwargs)



[docs]
    def compute_phasediff(self, procset = None):
        """
        Compute phase differences and store in database

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to

        """

        if procset is None:
            procset = self._procset

        if procset != self._procset:
            self.set_new_procset(procset)

        _, recs_all = self._sql.get_geometry(sin='*')
        all_receivers = recs_all.rx.values
        receiver_index = {rec: i for i, rec in enumerate(all_receivers)}

        # compute the phase differences
        starttime = time.time()
        for sin in self.data.keys():
            for rep in self.data[sin].keys():

                sys.stdout.write(f'\rCalculating phase differences for (SIN,REP) = '
                                 f'({sin},{rep}) ..... ')
                sys.stdout.flush()

                current_stream = self.select_data(sin=sin, rep=rep, inplace=False, verbose=False)

                if not self._sql.wids_exist(procset):
                    wids = [-1]
                else:
                    wids = self._sql.get_wids(sin, rep, procset)

                for wid in wids:

                    tmp = copy.deepcopy(current_stream)
                    self._set_data(tmp, sin, rep, procset, wid)

                    cur_pd, cur_fids, cur_freq = tmp.compute_phasediffs()
                    cur_rec = tmp.receiver
                    rin = np.array([receiver_index[r] for r in cur_rec[:-1]], dtype=int)

                    pd_full = np.zeros((len(cur_freq), len(all_receivers) - 1), dtype=cur_pd.dtype)
                    pd_full[:, rin] = cur_pd

                    self._sql.write_pd(cur_fids, cur_freq, pd_full, sin, rep, procset)

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')

        # compute mean and std of phase differences for repeated shots
        starttime = time.time()
        fprint = False

        for sin in self.data.keys():
            if len(self.data[sin]) > 1:
                fprint = True
                sys.stdout.write(f'\rCalculating mean and std of phase differences for (SIN) = '
                                 f'({sin}) ..... ')
                sys.stdout.flush()

                self._sql.group_pd(sin, procset, by = 'AVG')
                self._sql.group_pd(sin, procset, by = 'STDEV')

        if fprint:
            endtime = time.time()
            print(f'{np.round(endtime - starttime, 2)} s')

        self.set_loadset(procset)



[docs]
    def run(self, min_offset=3, max_offset=1e6, lam = 1, abs_err = None, rel_err = None, procset = None,
            opt_lam = False, scale = 0.6, lam_max = 200, **kwargs):
        """
        Run the tomographic like approach

        Parameters
        ----------
        min_offset : float, minimum offset to consider for trimming stream
        max_offset : float, maximum offset to consider for trimming stream
        lam : int, regularization strength
        rel_err : float or None, default None, error to consider in the error-weighted least-squares approach
        procset : str, identifier to set on which dataset the processing should be applied to
        opt_lam: bool, use constant lambda or find optimum lambda per iteration
        scale: float, percentage value to decrease lambda
        lam_max: int, maximum value of lambda

        """

        if procset is None:
            procset = self._procset

        np.set_printoptions(threshold=sys.maxsize)
        starttime = time.time()
        print(f'Running tomographic-like approach')

        # ensure phasediff table exists
        try:
            freq = self._sql.read_f_from_pd(procset, calc='NONE')
        except ValueError:
            self.compute_phasediff(procset)
            freq = self._sql.read_f_from_pd(procset, calc='NONE')

        # geometry
        _, recs_all = self._sql.get_geometry(sin='*')
        dx = np.mean(np.diff(recs_all['rx'].iloc[:-1].values))
        nrec = len(recs_all)
        nshot = len(self.data)

        phi_vel_all = np.zeros((nrec - 1, len(freq)))

        # MAIN FREQUENCY LOOP
        for jj, f in enumerate(freq):

            # allocate generously; shrink later
            maxrows = 2 * nshot * (nrec - 1)
            A = np.zeros((maxrows, nrec - 1))
            dphi = np.zeros((maxrows,))
            variance = np.zeros((maxrows,))

            iii = 0  # row counter

            # loop over shots
            for sin in self.data.keys():

                _, _, _, sht = self._sql.read_data(sin, 1, procset='raw')
                _, recs = self._sql.get_geometry(sin=sin)

                shot_x = sht['sx'].item()
                rx_all = recs_all['rx'].iloc[:-1].values

                offset = rx_all - shot_x

                if len(self.data[sin]) > 1:
                    pd_mean = self._sql.read_pd(sin, procset, calc='AVG')
                    pd_std = self._sql.read_pd(sin, procset, calc='STDEV')
                else:
                    pd_mean = self._sql.read_pd(sin, procset, calc='NONE')
                    pd_std = None

                # if no data exits continue with next iteration
                if pd_mean is None:
                    continue

                # FORWARD (offset > 0)
                fwd_mask = (
                        (offset >= min_offset) &
                        (offset <= max_offset) &
                        (rx_all >= recs['rx'].iloc[:-1].min()) &
                        (rx_all <= recs['rx'].iloc[:-1].max()) &
                        (pd_mean[jj, :] < 0)
                )
                fwd_idx = np.where(fwd_mask)[0]

                for ii in fwd_idx:
                    A[iii, ii] = dx
                    dphi[iii] = pd_mean[jj, ii]

                    # variance
                    if rel_err is not None:
                        variance[iii] = (rel_err * abs(dphi[iii]))
                    elif abs_err is not None:
                        variance[iii] = abs_err
                    elif pd_std is not None:
                        variance[iii] = pd_std[jj, ii] ** 2
                    else:
                        variance[iii] = 1.0

                    iii += 1

                # REVERSE (offset < 0)
                rev_mask = (
                        (offset <= -min_offset) &
                        (offset >= -max_offset) &
                        (rx_all >= recs['rx'].iloc[:-1].min()) &
                        (rx_all <= recs['rx'].iloc[:-1].max()) &
                        (pd_mean[jj, :] > 0)
                )
                rev_idx = np.where(rev_mask)[0]

                for ii in rev_idx:
                    A[iii, ii] = -dx
                    dphi[iii] = pd_mean[jj, ii]

                    if rel_err is not None:
                        variance[iii] = (rel_err * abs(dphi[iii]))
                    elif abs_err is not None:
                        variance[iii] = abs_err
                    elif pd_std is not None:
                        variance[iii] = pd_std[jj, ii] ** 2
                    else:
                        variance[iii] = 1.0

                    iii += 1

            # trim system to actual rows
            id = np.all(A == 0, axis=1)
            A = A[~id]
            dphi = dphi[~id.reshape((-1,))]
            variance = variance[~id.reshape((-1,))]

            # Weight matrix
            weights = 1.0 / variance

            w = np.diag(weights)

            if opt_lam:
                starttime = time.time()
                lam = lambda_search(f=f, A=A, dphi=dphi, w=w, lam_max=lam_max,scale=scale)

            # Solve system
            phi_vel, dphi_resp = tomo2D_phasediff(lam=lam, f=f, A=A, dphi=dphi, w=w)
            phi_vel_all[:, jj] = phi_vel

            _,_, chi2 = compute_chi2(dphi,dphi_resp, variance)

            if opt_lam:
                print(f"{f:6.2f} Hz | λ = {lam} | Cost = {np.linalg.norm(dphi_resp - dphi):.3f}")
            else:
                print(f"{f:6.2f} Hz | Cost = {np.linalg.norm(dphi_resp - dphi):.3f}")

            # plot results
            if kwargs.setdefault('showResults', False):
                recs_plot = np.asarray(recs_all['rx'].iloc[:-1])

                axes = kwargs.pop('axes', None)
                outfile = kwargs.pop('outfile', None)

                if outfile:
                    parent = os.path.dirname(outfile)
                    safe_makedirs(parent)

                plot_tomo2D(dphi, dphi_resp, recs_plot, phi_vel, f, axes, outfile)

        # Store curves
        xmids = recs_all['rx'].iloc[:-1].values + dx / 2

        for i in range(len(phi_vel_all)):
            data = {
                'xmid': xmids[i],
                'method': 'tomo2D',
                'dc_mode': 0,
                'frequency': freq,
                'velocity': phi_vel_all[i, :],
                'error': np.zeros_like(phi_vel_all[i, :])
            }

            self._sql.write_curve(data, -1, -1, wid=-1,
                                  procset=procset, xmid=data['xmid'])

        print(f'Finished tomographic-like approach ..... {np.round(time.time() - starttime, 2)} s')



[docs]
    def process_curves(self, type='smooth', procset = None, method = 'tomo2D',dc_mode = 0,  **kwargs):
        """
        Apply a process to a dispersion curve

        Parameters
        ----------
        type : str, which processing method to call
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        kwargs : arguments for the processing

        """

        if procset is None:
            procset = self._procset

        starttime = time.time()
        print(f'Applying {type} to dispersion curves ..... ', end = '')

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        curves = self._sql.read_curve(params)
        xmids = curves['xmid'].unique()

        for i,xmid in enumerate(xmids):

            params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                      'method': "'%s'" % method, 'dc_mode': dc_mode, 'xmid': xmid}

            self._process_curve(type, params, method = 'tomo2D', procset=procset, **kwargs)

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')



[docs]
    def plot_curves(self, procset = None, method = 'tomo2D',dc_mode = 0, **kwargs):
        """
        Plot dispersion curves

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation

        """

        if procset is None:
            procset = self._procset

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        curves = self._sql.read_curve(params)
        xmids = curves['xmid'].unique()

        for i,xmid in enumerate(xmids):

            params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                      'method': "'%s'" % method, 'dc_mode': dc_mode,'xmid': xmid}
            curve_data = self._sql.read_curve(params)
            dc = DispersionCurve()
            dc.init_data(curve_data['frequency'], curve_data['velocity'], curve_data['error'])
            dc.plot(**kwargs)



[docs]
    def save_curves(self, procset=None, method='tomo2D', dc_mode=0, format = 'csv', **kwargs):
        """
        Save the dispersion curves based on receiver spread midpoint

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        format : str, default 'csv', in which format the dispersion curve should be saved (dinver and ParkSeis support)

        """

        if procset is None:
            procset = self._procset

        params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                  'method': "'%s'" % method, 'dc_mode': dc_mode}

        curves = self._sql.read_curve(params)
        xmids = curves['xmid'].unique()

        # save the receiver spread midpoints
        path2dc = os.path.join(self.prjdir, f'03_proc/{procset}_{method}/Mode{int(dc_mode)}')
        path2geom = os.path.join(path2dc, '02_geom')

        starttime = time.time()
        print(f'Saving disperion curves to "{path2dc}" ..... ', end='')

        safe_makedirs(path2geom)
        np.savetxt(os.path.join(path2geom,'xmid.txt'), xmids)

        # save the dispersion curves
        for i,xmid in enumerate(xmids):

            params = {'sin': -1, 'rep': -1, 'wid': -1, 'procset': "'%s'" % procset,
                      'method': "'%s'" % method, 'dc_mode': dc_mode,'xmid': xmid}

            self._save_dc(path2dc, 'dc%d' % i, params, format=format, **kwargs)

        endtime = time.time()
        print(f'{np.round(endtime - starttime, 2)} s')




[docs]
    def save(self, procset=None, method='tomo2D', dc_mode=0, format = 'csv', **kwargs):
        """
        Save the dispersion curves based on receiver spread midpoint

        Parameters
        ----------
        procset : str, identifier to set on which dataset the processing should be applied to
        method : str, method used to obtain dispersion curve
        dc_mode : int, default 0, mode of propagation
        format : str, default 'csv', in which format the dispersion curve should be saved (dinver and ParkSeis support)

        """

        self.save_curves(procset, method, dc_mode, format, **kwargs)