From 57a45da3c14167d8ec49ac72700f2066e9228956 Mon Sep 17 00:00:00 2001 From: Marco Cammarata Date: Fri, 6 Jan 2017 15:40:26 +0100 Subject: [PATCH] more cleanup and improvements; storage can be chosen between npz and h5, data reduction is kind of tested --- id9.py | 41 ++++++----- mcutils.py | 22 +++--- xray/azav.py | 157 +++++++++++++++++++++--------------------- xray/dataReduction.py | 101 +++++++++++++++++++++++---- xray/storage.py | 18 ++++- xray/utils.py | 135 ++++++++++++++++++++++++++++++++++++ 6 files changed, 344 insertions(+), 130 deletions(-) create mode 100644 xray/utils.py diff --git a/id9.py b/id9.py index 5a95c17..ba21b02 100644 --- a/id9.py +++ b/id9.py @@ -5,6 +5,10 @@ import os import collections import numpy as np from .xray import azav +from .xray import dataReduction +from .xray import utils + +storage_extension = ".npz" def _conv(x): try: @@ -13,20 +17,6 @@ def _conv(x): x = np.nan return x -def getFiles(folder,basename="*.edf*"): - files = glob.glob(folder + "/" + basename) - files.sort() - return files - -def getEdfFiles(folder): - return getFiles(folder,basename="*.edf*") - -def removeExt(fname): - """ special remove extension meant to work with compressed files.edf and .edf.gz files """ - if fname[-3:] == ".gz": fname = fname[-3:] - return os.path.splitext(fname)[0] - - def readDelayFromDiagnostic(fname): """ return an ordered dict dictionary of filename; for each key a rounded value of delay is associated """ @@ -34,7 +24,7 @@ def readDelayFromDiagnostic(fname): data = np.genfromtxt(fname,usecols=(2,3),\ dtype=None,converters={3: lambda x: _conv(x)}, names = ['fname','delay']) - files = data['fname'].astype(np.str); # to avoid encoding problems + files = data['fname'].astype(str) delays = data['delay'] # skip lines that cannot be interpreted as float (like done, etc) idx_ok = np.isfinite( delays ) @@ -45,15 +35,24 @@ def readDelayFromDiagnostic(fname): def doFolder_azav(folder,nQ=1500,force=False,mask=None,saveChi=True, - poni='auto',h5File='auto'): + poni='auto',storageFile='auto'): """ very small wrapper around azav.doFolder, essentially just reading the diagnostics.log """ - if h5File == 'auto': n5File = folder + "/" + "pyfai_1d.h5" - diag = dict( delays = readDelayFromDiagnostic(folder) ) - + if storageFile == 'auto' : storageFile = folder + "/" + "pyfai_1d" + storage_extension return azav.doFolder(folder,files="*.edf*",nQ=nQ,force=force,mask=mask, - saveChi=saveChi,poni=poni,h5File=h5File,diagnostic=diag) - + saveChi=saveChi,poni=poni,storageFile=storageFile,diagnostic=diag) +def doFolder_dataRed(folder,monitor=None,funcForEveraging=np.nanmean,errFilter=True): + data = utils.data_storage(folder) + scan = data.delays + q = data.q + i = data.data + diffs = dataReduction.calcTimeResolvedSignal(scan,i,q=q,reference="min", + monitor=monitor,funcForEveraging=funcForEveraging) + if funcForEveraging == np.nanmean and errFilter: + diffs = dataReduction.errorFilter(diffs) + dataReduction.saveTxt(folder,diffs,info=data.pyfai_info) + diffs.save(folder + "/" + "diffs" + storage_extension) + return diffs diff --git a/mcutils.py b/mcutils.py index 69989bf..d0d42a3 100644 --- a/mcutils.py +++ b/mcutils.py @@ -422,19 +422,17 @@ def bytesToHuman(bytes,units="auto",fmt="%.2f %s"): value = bytes/1024**u return fmt % (value,units) -def strToTime(s): - """ convert 3us in 3e-6, ... """ - v = float(s[:-2]) - u = s[-2:] - if (u=="ps"): - v*=1e-12 - elif (u=="ns"): - v*=1e-9 - elif (u=="us"): - v*=1e-6 +_time_regex = re.compile("(-?\d+\.?\d*)((?:s|fs|ms|ns|ps|us)?)") +def strToTime(delay) : + _time2value = dict( fs = 1e-15, ps = 1e-12, ns = 1e-9, us = 1e-6, ms = 1e-3, s = 1) + + match = _time_regex.search(delay) + if match: + n,t = float(match.group(1)),match.group(2) + value = _time2value.get(t,1) + return n*value else: - pass - return v + return None def memAvailable(asHuman=True): import psutil diff --git a/xray/azav.py b/xray/azav.py index 6022d0a..699aef4 100644 --- a/xray/azav.py +++ b/xray/azav.py @@ -9,7 +9,8 @@ import os import collections import glob import pathlib -from . import storage as storage +from . import storage +from . import utils import pyFAI try: @@ -17,14 +18,6 @@ try: except ImportError: log.warn("Can't import matplotlib !") -def removeExt(fname): - """ special remove extension meant to work with compressed files.edf and .edf.gz files """ - if fname[-3:] == ".gz": fname = fname[-3:] - return os.path.splitext(fname)[0] - -def getBasename(fname): - return removeExt(os.path.basename(fname)) - def pyFAIread(fname): """ read data from file using fabio """ import fabio @@ -43,6 +36,10 @@ def pyFAI_dict(ai): ret["detector"] = ai.detector.get_name() return ret +def pyFAI_info(ai): + """ ai is a pyFAI azimuthal intagrator""" + return "#" + str(ai).replace("\n","\n#") + def pyFAI1d(ai, imgs, mask = None, npt_radial = 600, method = 'csr',safe=True,dark=10., polCorr = 1): """ ai is a pyFAI azimuthal intagrator it can be defined with pyFAI.load(ponifile) @@ -75,56 +72,6 @@ def pyFAI2d(ai, imgs, mask = None, npt_radial = 600, npt_azim=360,method = 'csr' out[_i] = i2d return q,azTheta,np.squeeze(out) - -def pyFAI_saveChi(fname,q,i,e=None,ai=None,overwrite=False): - if os.path.exists(fname) and not overwrite: - log.warn("File %s exists, returning",fname) - return - if ai is not None: - if not isinstance(ai,dict): ai = pyFAI_dict(ai) - header = [ "# %s : %s" %(k,v) for (k,v) in zip(ai.keys(),ai.values()) ] - header = "\n".join(header)[1:]; # skip first #, will be added by np - else: - header = "" - x = np.stack( (q,i,e) ) if e is not None else np.stack( (q,i) ) - np.savetxt(fname,x.T,fmt="%+10.5e",header=header) - -class pyFAI_storage(dict): - """ Storage for pyfai integrated info """ - def __init__(self,fileOrDict): - if isinstance(fileOrDict,dict): - self.filename = None - d = fileOrDict - else: - assert isinstance(fileOrDict,str) - self.filename = fileOrDict - d = storage.read(fileOrDict) - - # allow accessing with .data, .delays, etc. - for k,v in d.items(): setattr(self,k,v) - - # allow accessing as proper dict - self.update( **dict(d) ) - - def __setitem__(self, key, value): - setattr(self,key,value) - super().__setitem__(key, value) - - def __delitem__(self, key): - delattr(self,key) - super().__delitem__(key) - - def save(self,fname=None): - if fname is None: fname = self.filename - assert fname is not None - storage.save(fname,dict(self)) - - #def asdict(self): return dict(self) - -def readNpzFile(h5File): - if os.path.isdir(h5File): h5File = "%s/pyfai_1d.h5" % h5File - return pyFAI_storage(h5File) - def _getAI(poni,folder): if isinstance(poni,pyFAI.azimuthalIntegrator.AzimuthalIntegrator): ai = poni @@ -150,7 +97,7 @@ def _getAI(poni,folder): def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None, - saveChi=True,poni='auto',h5File='auto',diagnostic=None): + saveChi=True,poni='auto',storageFile='auto',diagnostic=None): """ calc 1D curves from files in folder, returning a dictionary of stuff nQ : number of Q-points (equispaced) force : if True, redo from beginning even if previous data are found @@ -171,13 +118,10 @@ def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None, n-1 in pwd n in homefolder """ + if storageFile == 'auto': storageFile = folder + "/" + "pyfai_1d.h5" - if h5File == 'auto': h5File = folder + "/" + "pyfai_1d.h5" - - if os.path.exists(h5File) and not force: - print("Loading") - saved = readNpzFile(h5File) - print("done") + if os.path.exists(storageFile) and not force: + saved = utils.data_storage(storageFile) else: saved = None @@ -187,7 +131,7 @@ def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None, files = glob.glob("%s/%s"%(folder,files)) files.sort() if saved is not None: - files = [f for f in files if getBasename(f) not in saved["files"]] + files = [f for f in files if utils.getBasename(f) not in saved["files"]] if len(files) > 0: # work out mask to use @@ -204,27 +148,27 @@ def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None, data[ifname] = i err[ifname] = e if saveChi: - chi_fname = removeExt(fname) + ".chi" - pyFAI_saveChi(chi_fname,q,i,e,ai=ai,overwrite=True) + chi_fname = utils.removeExt(fname) + ".chi" + utils.saveTxt(chi_fname,q,i,e,info=pyFAI_dict(ai),overwrite=True) - files = [ getBasename(f) for f in files ] + files = [ utils.getBasename(f) for f in files ] files = np.asarray(files) if saved is not None: files = np.concatenate( (saved["files"] ,files ) ) data = np.concatenate( (saved["data"] ,data ) ) err = np.concatenate( (saved["err"] ,err ) ) ret = dict(q=q,folder=folder,files=files,data=data,err=err, - pyfai=pyFAI_dict(ai),mask=mask) + pyfai=pyFAI_dict(ai),pyfai_info=pyFAI_info(ai),mask=mask) # add info from diagnostic if provided if diagnostic is not None: for k in diagnostic: ret[k] = np.asarray( [diagnostic[k][f] for f in ret['files']] ) - - if h5File is not None: np.savez(h5File,**ret) + ret = utils.data_storage(ret) + if storageFile is not None: ret.save(storageFile) else: ret = saved - return pyFAI_storage(ret) + return ret def _calc_R(x,y, xc, yc): @@ -279,6 +223,56 @@ def pyFAI_find_center(img,psize=100e-6,dist=0.1,wavelength=0.8e-10,**kwargs): print("Final values: (in pixels) %.3f %.3f"%(xc,yc)) return ai +def plotdata(q,data,plot=True,showTrend=True,title=None,clim='auto'): + if not (plot or showTrend): return + if clim == 'auto': clim = np.nanpercentile(data,(1.5,98.5)) + one_plot = showTrend or plot + two_plot = showTrend and plot + if one_plot and not two_plot: + fig,ax = plt.subplots(1,1) + if two_plot: + fig,ax = plt.subplots(1,2,sharey=True) + ax = np.atleast_1d(ax) + if showTrend: + plt.sca(ax[0]) + plt.pcolormesh(np.arange(data.shape[0]),q,data.T) + plt.xlabel("image number, 0 being older") + plt.ylabel(r"q ($\AA^{-1}$)") + plt.clim( *clim ) + if plot: + if showTrend: + ax[1].plot(data.mean(axis=0),q) + else: + ax[0].plot(q,data.mean(axis=0)) + if (plot or showTrend) and title is not None: + plt.title(title) + +def average(fileOrFolder,delays=slice(None),scale=1,norm=None,returnAll=False,plot=False, + showTrend=False): + data = utils.data_storage(fileOrFolder) + if isinstance(delays,slice): + idx = np.arange(data.delays.shape[0])[delays] + elif isinstance(delays,(int,float)): + idx = data.delays == float(delays) + else: + idx = data.delays < 0 + if idx.sum() == 0: + print("No data with the current filter") + return None + i = data.data[idx] + q = data.q + if isinstance(norm,(tuple,list)): + idx = ( q>norm[0] ) & (qnorm[0] ) & (q 0: - ret[i] = funcForEveraging(data[shot_idx],axis=0) - dataInScanPoint.append( data[shot_idx] ) - err[i] = np.std(data[shot_idx], axis = 0)/np.sqrt(shot_idx.sum()) - return dict(scan=scan_pos,data=ret,err=err,dataInScanPoint=dataInScanPoint) + + # select data for the scan point + data_for_scan = data[shot_idx] + dataInScanPoint.append( data_for_scan ) + + # calculate average + ret[i] = funcForEveraging(data_for_scan,axis=0) + + # calculate std + noise = np.nanstd(data[shot_idx], axis = 0) + + # calculate chi2 of different repetitions + chi2 = np.power( (data_for_scan - ret[i])/noise,2) + # sum over all axis but first + for _ in range(data_for_scan.ndim-1): + chi2 = np.nansum( chi2, axis=-1 ) + + # store chi2_0 + chi2_0.append( chi2/ret[i].size ) + + # store error of mean + err[i] = noise/np.sqrt(shot_idx.sum()) + + ret = dict(scan=scan_pos,data=ret,err=err,chi2_0=chi2_0, + dataInScanPoint=dataInScanPoint) + ret = utils.data_storage(ret) + return ret -def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None,**kw): +def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None, + saveTxt=True,folder="./",**kw): """ reference: can be 'min', 'max', a float|integer or an array of booleans - q : is needed if monitor is a tuple|list (it is interpreted as - q-range normalization) + q : is needed if monitor is a tuple|list + monitor : normalization vector (if it is interpreted a list it is + interpreted as q-range normalization) + saveTxt : will save txt outputfiles (diff_av_*) in folder other keywords are passed to averageScanPoints """ if reference == "min": @@ -108,7 +131,6 @@ def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None,**kw): isRef = (scan == reference) else: isRef = reference - # normalize if needed if monitor is not None: if isinstance(monitor,(tuple,list)): @@ -116,8 +138,57 @@ def calcTimeResolvedSignal(scan,data,reference="min",monitor=None,q=None,**kw): assert data.ndim == 2 idx = (q>= monitor[0]) & (q<= monitor[1]) monitor = np.nanmedian(data[:,idx],axis=1) + monitor = utils.reshapeToBroadcast(monitor,data) data = data/monitor - return averageScanPoints(scan,data,isRef=isRef,**kw) + ret = averageScanPoints(scan,data,isRef=isRef,**kw) + if q is not None: ret["q"] = q + return ret + +def errorFilter(data,threshold=4): + """ Very simple but effective filter for zinger like noise + The noise is in general lower when using nanmean instead than + nanmedian but nanmean does not filter out 'spikes'. + This filter mitigate this effect by using nanmedian for the q points + that have an higher than usual error (proxy for spikes ...) + tested with mi1245/dec2016/tiox/tiox1/run3 + """ + assert data.data.ndim == 2 + for iscan in range(len(data.dataInScanPoint)): + median_err = np.nanmedian(data.err[iscan]) + idx = data.err[iscan] > threshold*median_err + data.data[iscan][idx] = \ + np.nanmedian( data.dataInScanPoint[iscan][:,idx],axis=0 ) +# data.err[iscan][idx] = median_err + return data + +def saveTxt(folder,data,delayToStr=True,info="",**kw): + """ data must be a data_storage instance """ + q = data.q if "q" in data else np.arange(data.data.shape[-1]) + # save one file with all average diffs + fname = "%s/diff_av_matrix.txt" % (folder) + utils.saveTxt(fname,q,data.data,headerv=data.scan,**kw) + for iscan,scan in enumerate(data.scan): + scan = utils.timeToStr(scan) if delayToStr else "%+10.5e" % scan + + # try retreiving info on chi2 + try: + chi2_0 = data.chi2_0[iscan] + info_delay = [ " # rep_num , chi2_0 ", ] + for irep,value in enumerate(chi2_0): + info_delay.append( "# %d : %.3f" % (irep,value)) + info_delay = "\n".join(info_delay) + if info != '': info_delay = "%s\n%s" % (info,info_delay) + except AttributeError: + info_delay = info + + # save one file per timedelay with average diff (and err) + fname = "%s/diff_av_%s.txt" % (folder,scan) + utils.saveTxt(fname,q,data.data[iscan],e=data.err[iscan], + info=info_delay,**kw) + + # save one file per timedelay with all diffs for given delay + fname = "%s/diffs_%s.txt" % (folder,scan) + utils.saveTxt(fname,q,data.dataInScanPoint[iscan],info=info_delay,**kw) def read_diff_av(folder,plot2D=False,save=None): diff --git a/xray/storage.py b/xray/storage.py index f3128c3..4106039 100644 --- a/xray/storage.py +++ b/xray/storage.py @@ -5,28 +5,38 @@ import os import h5py import collections +import logging as log +log.basicConfig(level=log.INFO) def dictToH5Group(d,group): + """ helper function that transform (recursively) a dictionary into an + hdf group """ for key,value in d.items(): if not isinstance(value,(dict,collections.OrderedDict)): - # hack for special s... + # hacks for special s... # h5py can't handle numpy unicode arrays if isinstance(value,np.ndarray) and value.dtype.char == "U": value = np.asarray([vv.encode('ascii') for vv in value]) # h5py can't save None if value is None: value="NONE_PYTHON_OBJECT" - group[key] = value + try: + group[key] = value + except TypeError: + log.error("Can't save %s"%(key)) else: group.create_group(key) dictToH5Group(value,group[key]) def dictToH5(h5,d): + """ Save a dictionary into an hdf5 file + h5py is not capable of handling dictionaries natively""" h5 = h5py.File(h5,mode="w") # group = h5.create_group("/") dictToH5Group(d,h5["/"]) h5.close() def h5dataToDict(h5): + """ Read a hdf5 group into a dictionary """ if isinstance(h5,h5py.Dataset): temp = h5[...] # hack for special s... @@ -35,6 +45,9 @@ def h5dataToDict(h5): temp=temp.item() # h5py can't handle None if temp == "NONE_PYTHON_OBJECT": temp=None + # convert back from ascii to unicode + if isinstance(temp,np.ndarray) and temp.dtype.char == "S": + temp = temp.astype(str) return temp else: ret = dict() @@ -42,6 +55,7 @@ def h5dataToDict(h5): return ret def h5ToDict(h5): + """ Read a hdf5 file into a dictionary """ with h5py.File(h5,"r") as h: ret = h5dataToDict( h["/"] ) return ret diff --git a/xray/utils.py b/xray/utils.py new file mode 100644 index 0000000..28116d1 --- /dev/null +++ b/xray/utils.py @@ -0,0 +1,135 @@ +from __future__ import print_function,division + +import logging as log +log.basicConfig(level=log.INFO) + +import numpy as np +np.seterr(all='ignore') +import os +import glob +import pathlib +import re +from . import storage as storage + +try: + import matplotlib.pyplot as plt +except ImportError: + log.warn("Can't import matplotlib !") + +_time_regex = re.compile( "(-?\d+\.?\d*(?:ps|ns|us|ms)?)") +_timeInStr_regex = re.compile("_(-?\d+\.?\d*(?:ps|ns|us|ms)?)") + +def getDelayFromString(string) : + match = _timeInStr_regex_regex.search(string) + return match and match.group(1) or None + +_time_regex = re.compile("(-?\d+\.?\d*)((?:s|fs|ms|ns|ps|us)?)") +def strToTime(delay) : + _time2value = dict( fs = 1e-15, ps = 1e-12, ns = 1e-9, us = 1e-6, ms = 1e-3, s = 1) + + match = _time_regex.search(delay) + if match: + n,t = float(match.group(1)),match.group(2) + value = _time2value.get(t,1) + return n*value + else: + return None + +def timeToStr(delay,fmt="%+.0f"): + a_delay = abs(delay) + if a_delay >= 1: + ret = fmt % delay + "s" + elif 1e-3 <= a_delay < 1: + ret = fmt % (delay*1e3) + "ms" + elif 1e-6 <= a_delay < 1e-3: + ret = fmt % (delay*1e6) + "us" + elif 1e-9 <= a_delay < 1e-6: + ret = fmt % (delay*1e9) + "ns" + elif 1e-12 <= a_delay < 1e-9: + ret = fmt % (delay*1e12) + "ps" + elif 1e-15 <= a_delay < 1e-12: + ret = fmt % (delay*1e12) + "fs" + elif 1e-18 <= a_delay < 1e-15: + ret = fmt % (delay*1e12) + "as" + return ret + +def removeExt(fname): + """ special remove extension meant to work with compressed files.edf and .edf.gz files """ + if fname[-3:] == ".gz": fname = fname[-3:] + return os.path.splitext(fname)[0] + +def getBasename(fname): + return removeExt(os.path.basename(fname)); + +def saveTxt(fname,q,i,e=None,headerv=None,info=None,overwrite=True): + """ Write data to file 'fname' in text format. + Inputs: + q = x vector + i = 1D or 2D + e = 1D (discarded when i is 2D) + info = dictionary (saved as '# key : value') or string + headerv = vector to be used as header or string + """ + if os.path.exists(fname) and not overwrite: + log.warn("File %s exists, returning",fname) + return + if isinstance(info,dict): + header = [ "# %s : %s" %(k,str(v)) for (k,v) in info.items() ] + header = "\n".join(header); # skip first #, will be added by np + elif isinstance(info,str): + header = info + else: + header = "" + if isinstance(headerv,str): header += "\n%s" % headerv + if i.ndim == 1: + x = np.vstack( (q,i,e) ) if e is not None else np.vstack( (q,i) ) + if i.ndim == 2: + x = np.vstack( (q,i,) ) + if headerv is not None: + headerv = np.concatenate(( (i.shape[1],),headerv)) + x = np.hstack( (headerv[:,np.newaxis],x) ) + np.savetxt(fname,x.T,fmt="%+10.5e",header=header,comments='') + +class data_storage(dict): + """ Storage for 1d integrated info """ + def __init__(self,fileOrDict): + if isinstance(fileOrDict,dict): + self.filename = None + d = fileOrDict + else: + assert isinstance(fileOrDict,str) + if os.path.isdir(fileOrDict): fileOrDict = fileOrDict + "/pyfai_1d.h5" + self.filename = fileOrDict + d = storage.read(fileOrDict) + + # allow accessing with .data, .delays, etc. + for k,v in d.items(): setattr(self,k,v) + + # allow accessing as proper dict + self.update( **dict(d) ) + + def __setitem__(self, key, value): + setattr(self,key,value) + super().__setitem__(key, value) + + def __delitem__(self, key): + delattr(self,key) + super().__delitem__(key) + + def save(self,fname=None): + if fname is None: fname = self.filename + assert fname is not None + storage.save(fname,dict(self)) + + #def asdict(self): return dict(self) + + +def reshapeToBroadcast(what,ref): + """ expand the 1d array 'what' to allow broadbasting to match + multidimentional array 'ref'. The two arrays have to same the same + dimensions along the first axis + """ + assert what.shape[0] == ref.shape[0] + shape = [ref.shape[0],] + [1,]*(ref.ndim-1) + return what.reshape(shape) +