From d571c4435fd02ef957000fe219ee8ee11441ccd9 Mon Sep 17 00:00:00 2001 From: Marco Cammarata Date: Thu, 9 Feb 2017 16:57:37 +0100 Subject: [PATCH] new module with filtering functions --- xray/filters.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 xray/filters.py diff --git a/xray/filters.py b/xray/filters.py new file mode 100644 index 0000000..fadabd1 --- /dev/null +++ b/xray/filters.py @@ -0,0 +1,85 @@ +""" + module that contains filters and outliers removal procedures + most of them return the data array and a dictionary with additional info + (parameters, statistics, etc) +""" +from __future__ import print_function,division +from . import utils +import logging +import statsmodels.robust +log = logging.getLogger(__name__) # __name__ is "foo.bar" here + +import numpy as np +np.seterr(all='ignore') + +def removeZingers(curves,errs=None,norm='auto',threshold=10,useDerivative=False): + """ curves will be normalized internally + if errs is None, calculate mad based noise + useDerivative for data with trends .. + """ + # normalize + if norm == 'auto': + norm = np.nanmean(curves,axis=1) + norm = utils.reshapeToBroadcast(norm,curves) + + if useDerivative: + data = np.gradient(curves/norn,axis=0) + else: + data = curves/norm + + median = np.median(data,axis=0) + + # calculate or normalize error + if errs is None: + errs = statsmodels.robust.mad(data,axis=0) + else: + errs = errs/norm + + diff = np.abs(data-median)/errs + idx = diff > threshold + log.debug("Removed %d zingers from %d curves"%(idx.sum(),len(curves))) + print("Removed %d zingers from %d curves"%(idx.sum(),len(curves))) + if idx.sum()>0: + curves[idx]=np.nan + #curves = np.ma.MaskedArray(data=curves,mask=idx) + return curves + +def filterOutlier(curves,errs=None,norm=None,threshold=10): + # normalize + if norm == 'auto': + norm = np.nanmean(curves,axis=1) + norm = utils.reshapeToBroadcast(n,curves) + elif norm is None: + norm = 1 + + curves = curves/norm + if errs is None: + errs = statsmodels.robust.mad(curves,axis=0) + else: + errs = errs/norm + + median = np.median(curves) + diff = np.abs(curves-median)/errs + chi2 = np.sum(diff**2)/len(curves) + idx = chi2 < threshold + return curves[idx] + +def chi2Filter(diffs,threshold=10): + """ Contrary to removeZingers, this removes entire curves """ + idx_mask = [] + for iscan in range(len(diffs.diffsInScanPoint)): + idx = diffs.chi2_0[iscan] > threshold + # expand along other axis (q ...) + #idx = utils.reshapeToBroadcast(idx,data.diffsInScanPoint[iscan]) + idx_mask.append(idx) + log.debug("Chi2 mask, scanpoint: %s, curves filtereout out %d/%d (%.2f%%)"%\ + (data.scan[iscan],idx.sum(),len(idx),idx.sum()/len(idx)*100) ) + print("Chi2 mask, scanpoint: %s, curves filtereout out %d/%d (%.2f%%)"%\ + (data.scan[iscan],idx.sum(),len(idx),idx.sum()/len(idx)*100) ) + + if "masks" not in data: data['masks'] = dict() + if "masks_pars" not in data: data['masks_pars'] = dict() + data['masks']['chi2'] = idx_mask + data['masks_pars']['chi2_threshold'] = threshold + return data +