bug fix in index for reference + removed filtering

This commit is contained in:
Marco Cammarata 2017-02-09 17:14:35 +01:00
parent 02186d220f
commit c9ef0f42a6
1 changed files with 45 additions and 86 deletions

View File

@ -3,7 +3,6 @@ from __future__ import print_function,division
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
import numpy as np import numpy as np
np.seterr(all='ignore') np.seterr(all='ignore')
from . import utils from . import utils
@ -19,36 +18,55 @@ def subtractReferences(i,idx_ref, useRatio = False):
iref=np.empty_like(i) iref=np.empty_like(i)
idx_ref = np.squeeze(idx_ref) idx_ref = np.squeeze(idx_ref)
idx_ref = np.atleast_1d(idx_ref) idx_ref = np.atleast_1d(idx_ref)
# sometime there is just one reference (e.g. sample scans)
if idx_ref.shape[0] == 1: if idx_ref.shape[0] == 1:
return i-i[idx_ref] if useRatio:
return i/i[idx_ref]
else:
return i-i[idx_ref]
# references before first ref are "first ref" # references before first ref are "first ref"
iref[:idx_ref[0]] = i[idx_ref[0]] iref[:idx_ref[0]] = i[idx_ref[0]]
# references after last ref are "last ref" # references after last ref are "last ref"
iref[idx_ref[-1]:] = i[idx_ref[-1]] iref[idx_ref[-1]:] = i[idx_ref[-1]]
_ref = 0 _ref = 0
for _i in range(idx_ref[0],idx_ref[-1]): for _i in range(idx_ref[0],idx_ref[-1]):
if _i in idx_ref: continue
idx_ref_before = idx_ref[_ref] idx_ref_before = idx_ref[_ref]
idx_ref_after = idx_ref[_ref+1] idx_ref_after = idx_ref[_ref+1]
ref_before = i[idx_ref_before] ref_before = i[idx_ref_before]
ref_after = i[idx_ref_after] ref_after = i[idx_ref_after]
weight_before = float(_i-idx_ref_before)/(idx_ref_after-idx_ref_before) weight_before = float(_i-idx_ref_before)/(idx_ref_after-idx_ref_before)
weight_after = 1-weight_before weight_after = 1-weight_before
if weight_after == 1: # normal reference for an on chi, the weighted average
iref[_i] = ref_before iref[_i] = weight_before*ref_before + weight_after*ref_after
elif weight_before == 1: if _i>=idx_ref_after-1: _ref += 1
iref[_i] = ref_after log.debug("For image %d : %d-%d"%(_i,idx_ref_before,idx_ref_after))
else: # take care of the reference for the references ...
if len(idx_ref) > 2:
iref[idx_ref[0]] = i[idx_ref[1]]
iref[idx_ref[-1]] = i[idx_ref[-2]]
for _i in range(1,len(idx_ref)-1):
idx_ref_before = idx_ref[_i-1]
idx_ref_after = idx_ref[_i+1]
ref_before = i[idx_ref_before]
ref_after = i[idx_ref_after]
weight_before = float(idx_ref[_i]-idx_ref_before)/(idx_ref_after-idx_ref_before)
weight_after = 1-weight_before
# normal reference for an on chi, the weighted average # normal reference for an on chi, the weighted average
iref[_i] = weight_before*ref_before + weight_after*ref_after iref[idx_ref[_i]] = weight_before*ref_before + weight_after*ref_after
if _i>=idx_ref_after: _ref += 1 log.debug("For reference image %d : %d-%d"%(idx_ref[_i],idx_ref_before,idx_ref_after))
log.debug("SubtractRederence For image %d : %d-%d"%(_i,idx_ref_before,idx_ref_after)) else:
#print(idx_ref)
#print(iref[idx_ref])
iref[idx_ref]=i[idx_ref[0]]
#print(iref[idx_ref])
if useRatio: if useRatio:
i /= iref i /= iref
else: else:
i -= iref i -= iref
return i return i
def averageScanPoints(scan,data,isRef=None,lpower=None,useRatio=False,\ def averageScanPoints(scan,data,errAbs=None,isRef=None,lpower=None,useRatio=False,\
funcForAveraging=np.nanmean): funcForAveraging=np.nanmean):
""" given scanpoints in 'scan' and corresponding data in 'data' """ given scanpoints in 'scan' and corresponding data in 'data'
average all data corresponding the exactly the same scanpoint. average all data corresponding the exactly the same scanpoint.
@ -98,15 +116,21 @@ def averageScanPoints(scan,data,isRef=None,lpower=None,useRatio=False,\
# select data for the scan point # select data for the scan point
diff_for_scan = diff[shot_idx] diff_for_scan = diff[shot_idx]
#if errAbs is not None:
# noise = np.nanmean(errAbs[shot_idx],axis = 0)
#else:
noise = np.nanstd(diff_for_scan, axis = 0)
# if it is the reference take only every second ...
if np.all( shot_idx == isRef ):
diff_for_scan = diff_for_scan[::2]
diffsInScanPoint.append( diff_for_scan ) diffsInScanPoint.append( diff_for_scan )
# calculate average # calculate average
ret[i] = funcForAveraging(diff_for_scan,axis=0) ret[i] = funcForAveraging(diff_for_scan,axis=0)
data_abs[i] = funcForAveraging(data[shot_idx],axis=0) data_abs[i] = funcForAveraging(data[shot_idx],axis=0)
# calculate std
noise = np.nanstd(diff[shot_idx], axis = 0)
# calculate chi2 of different repetitions # calculate chi2 of different repetitions
chi2 = np.power( (diff_for_scan - ret[i])/noise,2) chi2 = np.power( (diff_for_scan - ret[i])/noise,2)
# sum over all axis but first # sum over all axis but first
@ -120,13 +144,13 @@ def averageScanPoints(scan,data,isRef=None,lpower=None,useRatio=False,\
err[i] = noise/np.sqrt(shot_idx.sum()) err[i] = noise/np.sqrt(shot_idx.sum())
ret = dict(scan=scan_pos,data=ret,dataUnmasked=ret.copy(),err=err, ret = dict(scan=scan_pos,data=ret,dataUnmasked=ret.copy(),err=err,
errUnmasked=err.copy(),chi2_0=chi2_0,diffsInScanPoint=diffsInScanPoint, errUnmasked=err.copy(),chi2_0=chi2_0,diffsInScanPoint=diffsInScanPoint,
dataAbsAvNeg = avNeg, dataAsAbs=ret+avNeg, dataAbsAvNeg = avNeg, dataAsAbs=ret+avNeg,errAbs=errAbs,
dataAbsAvAll=avData,dataAbsAvScanPoint=data_abs,dataAbs=data.copy()) dataAbsAvAll=avData,dataAbsAvScanPoint=data_abs,dataAbs=data.copy())
ret = storage.DataStorage(ret) ret = storage.DataStorage(ret)
return ret return ret
def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None, def calcTimeResolvedSignal(scan,data,err=None,reference="min",monitor=None,q=None,
saveTxt=True,folder="./",**kw): saveTxt=True,folder="./",**kw):
""" """
reference: can be 'min', 'max', a float|integer or an array of booleans reference: can be 'min', 'max', a float|integer or an array of booleans
@ -153,83 +177,18 @@ def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None,
monitor = np.nanmedian(data[:,idx],axis=1) monitor = np.nanmedian(data[:,idx],axis=1)
monitor = utils.reshapeToBroadcast(monitor,data) monitor = utils.reshapeToBroadcast(monitor,data)
data = data/monitor data = data/monitor
ret = averageScanPoints(scan,data,isRef=isRef,**kw) if err is not None: err = err/monitor
ret = averageScanPoints(scan,data,errAbs=err,isRef=isRef,**kw)
if q is not None: ret["q"] = q if q is not None: ret["q"] = q
return ret return ret
def errorMask(data,threshold=5):
""" Q-by-Q mask !
Very simple but effective mask for zinger like noise
The noise is in general lower when using nanmean instead than
nanmedian but nanmean does not mask out 'spikes'.
This mask mitigate this effect by using nanmedian for the q points
that have an higher than usual error (proxy for spikes ...)
tested with mi1245/dec2016/tiox/tiox1/run3
"""
assert data.data.ndim == 2
idx_mask = []
for iscan in range(len(data.diffsInScanPoint)):
temp = data.diffsInScanPoint[iscan]
# sqrt(len(temp)) = sqrt(numOfDiffs); it is needed to estimate error of single Diff
idx = np.abs(temp-np.median(temp,axis=0)) > threshold*data.err[iscan]*np.sqrt(len(temp))
idx_mask.append( idx )
log.debug("errorMask mask, scanpoint: %s, fraction of q points filtered out (average) %.4e [max %.4e])"%\
(data.scan[iscan],idx.sum()/idx.size,max(np.sum(idx,axis=1)/idx.shape[1])) )
if "masks" not in data: data['masks'] = dict()
if "masks_pars" not in data: data['masks_pars'] = dict()
data['masks']['error'] = idx_mask
data['masks_pars']['errormask_threshold'] = threshold
return data
def chi2Mask(data,threshold=2):
"""
The noise is in general lower when using nanmean instead than
nanmedian but nanmean does not mask out 'spikes'.
This mask mitigate this effect by using nanmedian for the q points
that have an higher than usual error (proxy for spikes ...)
tested with mi1245/dec2016/tiox/tiox1/run3
"""
idx_mask = []
for iscan in range(len(data.diffsInScanPoint)):
idx = data.chi2_0[iscan] > threshold
# expand along other axis (q ...)
idx = utils.reshapeToBroadcast(idx,data.diffsInScanPoint[iscan])
idx_mask.append(idx)
log.debug("Chi2 mask, scanpoint: %s, curves filtereout out %d/%d (%.2f%%)"%\
(data.scan[iscan],idx.sum(),len(idx),idx.sum()/len(idx)*100) )
if "masks" not in data: data['masks'] = dict()
if "masks_pars" not in data: data['masks_pars'] = dict()
data['masks']['chi2'] = idx_mask
data['masks_pars']['chi2_threshold'] = threshold
return data
def applyMasks(data,which='all',funcForAveraging=np.nanmean):
# don't do anything if no mask is defined
if 'masks' not in data: return data
if which == 'all': which = list(data['masks'].keys())
totmask = []
for iscan in range(len(data.diffsInScanPoint)):
mask = data['masks'][which[0]][iscan]
for w in which[1:]:
mask = np.logical_and(mask,data['masks'][w][iscan])
mask = np.squeeze(mask)
totmask.append(mask); # store for later
# check is a q-by-q mask
if mask.shape == data.diffsInScanPoint[iscan].shape:
temp = np.ma.MaskedArray(data=data.diffsInScanPoint[iscan],mask=mask)
data.data[iscan] = funcForAveraging( temp,axis=0 )
else:
data.data[iscan] = funcForAveraging( data.diffsInScanPoint[iscan][~mask],axis=0)
data['mask'] = totmask
return data
def saveTxt(folder,data,delayToStr=True,basename='auto',info="",**kw): def saveTxt(folder,data,delayToStr=True,basename='auto',info="",**kw):
""" data must be a DataStorage instance """ """ data must be a DataStorage instance """
# folder ends usually with sample/run so use the last two subfolders # folder ends usually with sample/run so use the last two subfolders
# the abspath is needed in case we analyze the "./"
folder = os.path.abspath(folder);
if basename == 'auto': if basename == 'auto':
basename = "_".join(folder.rstrip("/").split("/")[-2:]) + "_" basename = "_".join(folder.rstrip("/").split("/")[-2:]) + "_"
q = data.q if "q" in data else np.arange(data.data.shape[-1]) q = data.q if "q" in data else np.arange(data.data.shape[-1])
# save one file with all average diffs # save one file with all average diffs
fname = "%s/%sdiff_av_matrix.txt" % (folder,basename) fname = "%s/%sdiff_av_matrix.txt" % (folder,basename)