cleanup a bit more the storage module ...

This commit is contained in:
Marco Cammarata 2017-01-07 23:53:12 +01:00
parent fdf9b08aee
commit 434fd6a3b1
6 changed files with 92 additions and 81 deletions

View File

@ -121,7 +121,7 @@ def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None,
if storageFile == 'auto': storageFile = folder + "/" + "pyfai_1d.h5" if storageFile == 'auto': storageFile = folder + "/" + "pyfai_1d.h5"
if os.path.exists(storageFile) and not force: if os.path.exists(storageFile) and not force:
saved = utils.data_storage(storageFile) saved = storage.DataStorage(storageFile)
else: else:
saved = None saved = None
@ -163,7 +163,7 @@ def doFolder(folder,files='*.edf*',nQ = 1500,force=False,mask=None,
if diagnostic is not None: if diagnostic is not None:
for k in diagnostic: for k in diagnostic:
ret[k] = np.asarray( [diagnostic[k][f] for f in ret['files']] ) ret[k] = np.asarray( [diagnostic[k][f] for f in ret['files']] )
ret = utils.data_storage(ret) ret = storage.DataStorage(ret)
if storageFile is not None: ret.save(storageFile) if storageFile is not None: ret.save(storageFile)
else: else:
ret = saved ret = saved
@ -224,7 +224,7 @@ def pyFAI_find_center(img,psize=100e-6,dist=0.1,wavelength=0.8e-10,**kwargs):
def average(fileOrFolder,delays=slice(None),scale=1,norm=None,returnAll=False,plot=False, def average(fileOrFolder,delays=slice(None),scale=1,norm=None,returnAll=False,plot=False,
showTrend=False): showTrend=False):
data = utils.data_storage(fileOrFolder) data = storage.DataStorage(fileOrFolder)
if isinstance(delays,slice): if isinstance(delays,slice):
idx = np.arange(data.delays.shape[0])[delays] idx = np.arange(data.delays.shape[0])[delays]
elif isinstance(delays,(int,float)): elif isinstance(delays,(int,float)):

View File

@ -6,6 +6,7 @@ log.basicConfig(level=log.INFO)
import numpy as np import numpy as np
np.seterr(all='ignore') np.seterr(all='ignore')
from . import utils from . import utils
from . import storage
import os import os
def subtractReferences(i,idx_ref, useRatio = False): def subtractReferences(i,idx_ref, useRatio = False):
@ -61,6 +62,7 @@ def averageScanPoints(scan,data,isRef=None,lpower=None,useRatio=False,\
function that support axis=0 as keyword argument function that support axis=0 as keyword argument
""" """
data = data.astype(np.float) data = data.astype(np.float)
avData = np.nanmedian( data , axis = 0 )
if isRef is None: isRef = np.zeros( data.shape[0], dtype=bool ) if isRef is None: isRef = np.zeros( data.shape[0], dtype=bool )
assert data.shape[0] == isRef.shape[0] assert data.shape[0] == isRef.shape[0]
@ -110,8 +112,8 @@ def averageScanPoints(scan,data,isRef=None,lpower=None,useRatio=False,\
err[i] = noise/np.sqrt(shot_idx.sum()) err[i] = noise/np.sqrt(shot_idx.sum())
ret = dict(scan=scan_pos,data=ret,err=err,chi2_0=chi2_0, ret = dict(scan=scan_pos,data=ret,err=err,chi2_0=chi2_0,
dataInScanPoint=dataInScanPoint) dataInScanPoint=dataInScanPoint,avData=avData)
ret = utils.data_storage(ret) ret = storage.DataStorage(ret)
return ret return ret

View File

@ -7,8 +7,8 @@ from mcutils.xray import id9
id9 = xray.id9 id9 = xray.id9
# use npz files (they can handle more stuff (list of arrays,unicode) than h5py) # use npz files (they can handle more stuff (list of arrays,unicode) than h5py)
id9.storage_extension = '.npz' id9.default_extension = '.npz'
#id9.storage_extension = '.h5' #id9.default_extension_extension = '.h5'
def azav(folder,nQ=1500,force=False,saveChi=True, def azav(folder,nQ=1500,force=False,saveChi=True,
poni='auto',storageFile='auto',mask=470): poni='auto',storageFile='auto',mask=470):

View File

@ -7,8 +7,9 @@ import numpy as np
from . import azav from . import azav
from . import dataReduction from . import dataReduction
from . import utils from . import utils
from . import storage
storage_extension = ".npz" default_extension = ".npz"
def _conv(x): def _conv(x):
try: try:
@ -40,17 +41,17 @@ def doFolder_azav(folder,nQ=1500,force=False,mask=None,saveChi=True,
the diagnostics.log """ the diagnostics.log """
diag = dict( delays = readDelayFromDiagnostic(folder) ) diag = dict( delays = readDelayFromDiagnostic(folder) )
if storageFile == 'auto' : storageFile = folder + "/" + "pyfai_1d" + storage_extension if storageFile == 'auto' : storageFile = folder + "/" + "pyfai_1d" + default_extension
return azav.doFolder(folder,files="*.edf*",nQ=nQ,force=force,mask=mask, return azav.doFolder(folder,files="*.edf*",nQ=nQ,force=force,mask=mask,
saveChi=saveChi,poni=poni,storageFile=storageFile,diagnostic=diag) saveChi=saveChi,poni=poni,storageFile=storageFile,diagnostic=diag)
def doFolder_dataRed(folder,storageFile='auto',monitor=None, def doFolder_dataRed(folder,storageFile='auto',monitor=None,
funcForEveraging=np.nanmean,errFilter=True): funcForEveraging=np.nanmean,errFilter=True):
if storageFile == 'auto' : storageFile = folder + "/" + "pyfai_1d" + storage_extension if storageFile == 'auto' : storageFile = folder + "/" + "pyfai_1d" + default_extension
# read azimuthal averaged curves # read azimuthal averaged curves
data = utils.data_storage(storageFile) data = storage.DataStorage(storageFile)
# calculate differences # calculate differences
@ -63,6 +64,6 @@ def doFolder_dataRed(folder,storageFile='auto',monitor=None,
# save txt and npz file # save txt and npz file
dataReduction.saveTxt(folder,diffs,info=data.pyfai_info) dataReduction.saveTxt(folder,diffs,info=data.pyfai_info)
diffs.save(folder + "/" + "diffs" + storage_extension) diffs.save(folder + "/" + "diffs" + default_extension)
return data,diffs return data,diffs

View File

@ -1,5 +1,7 @@
""" hdf5 file based storage; this modules adds the possibility to dump dict as """ npz/hdf5 file based storage;
hdf5 File """ this modules adds the possibility to dump and load objects in files and
a more convenient was of accessing the data via the .attributedict thanks
to the DataStorage class """
import numpy as np import numpy as np
import os import os
import h5py import h5py
@ -8,12 +10,41 @@ import collections
import logging as log import logging as log
log.basicConfig(level=log.INFO) log.basicConfig(level=log.INFO)
def unwrapArray(a,recursive=True,readH5pyDataset=True):
""" This function takes an object (like a dictionary) and recursivively
unwraps it solving many issues like the fact that many objects are
packaged as 0d array
This funciton has also some specific hack for handling h5py limit to
handle for example the None object or the numpy unicode ...
"""
# is h5py dataset convert to array
if isinstance(a,h5py.Dataset) and readH5pyDataset: a = a[...]
if isinstance(a,h5py.Dataset) and a.shape == (): a = a[...]
if isinstance(a,np.ndarray) and a.ndim == 0 : a = a.item()
if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)
if recursive:
if "items" in dir(a): # dict, h5py groups, npz file
a = dict(a); # convert to dict, otherwise can't asssign values
for key,value in a.items(): a[key] = unwrapArray(value)
elif isinstance(a,list):
for index in range(len(a)): a[index] = unwrapArray(a[i])
else:
pass
if isinstance(a,dict): a = DataStorage(a)
# restore None that cannot be saved in h5py
if isinstance(a,str) and a == "NONE_PYTHON_OBJECT": a = None
# h5py can't save numpy unicode
if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)
return a
def dictToH5Group(d,group): def dictToH5Group(d,group):
""" helper function that transform (recursively) a dictionary into an """ helper function that transform (recursive) a dictionary into an
hdf group """ hdf group by creating subgroups """
for key,value in d.items(): for key,value in d.items():
if not isinstance(value,(dict,collections.OrderedDict)): if isinstance(value,dict):
# hacks for special s... group.create_group(key)
dictToH5Group(value,group[key])
else:
# h5py can't handle numpy unicode arrays # h5py can't handle numpy unicode arrays
if isinstance(value,np.ndarray) and value.dtype.char == "U": if isinstance(value,np.ndarray) and value.dtype.char == "U":
value = np.asarray([vv.encode('ascii') for vv in value]) value = np.asarray([vv.encode('ascii') for vv in value])
@ -23,9 +54,6 @@ def dictToH5Group(d,group):
group[key] = value group[key] = value
except TypeError: except TypeError:
log.error("Can't save %s"%(key)) log.error("Can't save %s"%(key))
else:
group.create_group(key)
dictToH5Group(value,group[key])
def dictToH5(h5,d): def dictToH5(h5,d):
""" Save a dictionary into an hdf5 file """ Save a dictionary into an hdf5 file
@ -35,37 +63,15 @@ def dictToH5(h5,d):
dictToH5Group(d,h5["/"]) dictToH5Group(d,h5["/"])
h5.close() h5.close()
def h5dataToDict(h5): def h5ToDict(h5,readH5pyDataset=True):
""" Read a hdf5 group into a dictionary """
if isinstance(h5,h5py.Dataset):
temp = h5[...]
# hack for special s...
# unwrap 0d arrays
if isinstance(temp,np.ndarray) and temp.ndim == 0:
temp=temp.item()
# h5py can't handle None
if temp == "NONE_PYTHON_OBJECT": temp=None
# convert back from ascii to unicode
if isinstance(temp,np.ndarray) and temp.dtype.char == "S":
temp = temp.astype(str)
return temp
else:
ret = dict()
for k,v in h5.items(): ret[k] = h5dataToDict(v)
return ret
def h5ToDict(h5):
""" Read a hdf5 file into a dictionary """ """ Read a hdf5 file into a dictionary """
with h5py.File(h5,"r") as h: with h5py.File(h5,"r") as h:
ret = h5dataToDict( h["/"] ) ret = unwrapArray(h,recursive=True,readH5pyDataset=readH5pyDataset)
return ret return ret
def npzToDict(npzFile): def npzToDict(npzFile):
with np.load(npzFile) as npz: d = dict(npz) with np.load(npzFile) as npz: d = dict(npz)
# unwrap 0d arrays d = unwrapArray(d,recursive=True)
for key,value in d.items():
if isinstance(value,np.ndarray) and value.ndim == 0: d[key]=value.item()
return d return d
def dictToNpz(npzFile,d): np.savez(npzFile,**d) def dictToNpz(npzFile,d): np.savez(npzFile,**d)
@ -90,3 +96,34 @@ def save(fname,d):
else: else:
raise ValueError("Extension must be h5 or npz") raise ValueError("Extension must be h5 or npz")
class DataStorage(dict):
""" Storage for 1d integrated info """
def __init__(self,fileOrDict,default_name='pyfai_1d',default_ext='npz'):
if isinstance(fileOrDict,dict):
self.filename = None
d = fileOrDict
else:
assert isinstance(fileOrDict,str)
if os.path.isdir(fileOrDict):
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
self.filename = fileOrDict
d = read(fileOrDict)
# allow accessing with .data, .delays, etc.
for k,v in d.items(): setattr(self,k,v)
# allow accessing as proper dict
self.update( **dict(d) )
def __setitem__(self, key, value):
setattr(self,key,value)
super().__setitem__(key, value)
def __delitem__(self, key):
delattr(self,key)
super().__delitem__(key)
def save(self,fname=None):
if fname is None: fname = self.filename
assert fname is not None
save(fname,dict(self))

View File

@ -97,7 +97,8 @@ def plotdata(q,data,t=None,plot=True,showTrend=True,title=None,clim='auto'):
plt.title(title) plt.title(title)
def plotdiffs(q,diffs,t,select=None,err=None,showErr=False,cmap=plt.cm.jet): def plotdiffs(q,diffs,t,select=None,err=None,absSignal=None,absSignalScale=10,
showErr=False,cmap=plt.cm.jet):
# this selection trick done in this way allows to keep the same colors when # this selection trick done in this way allows to keep the same colors when
# subselecting (because I do not change the size of diffs) # subselecting (because I do not change the size of diffs)
if select is not None: if select is not None:
@ -105,6 +106,10 @@ def plotdiffs(q,diffs,t,select=None,err=None,showErr=False,cmap=plt.cm.jet):
else: else:
indices = range(len(t)) indices = range(len(t))
lines = [] lines = []
if absSignal is not None:
line = plt.plot(q,absSignal/absSignalScale,
color='k',label="absSignal/%s"%str(absSignalScale))[0]
lines.append(line)
for idiff in indices: for idiff in indices:
color = cmap(idiff/(len(diffs)-1)) color = cmap(idiff/(len(diffs)-1))
label = timeToStr(t[idiff]) label = timeToStr(t[idiff])
@ -174,40 +179,6 @@ def saveTxt(fname,q,i,e=None,headerv=None,info=None,overwrite=True):
x = np.hstack( (headerv[:,np.newaxis],x) ) x = np.hstack( (headerv[:,np.newaxis],x) )
np.savetxt(fname,x.T,fmt="%+10.5e",header=header,comments='') np.savetxt(fname,x.T,fmt="%+10.5e",header=header,comments='')
class data_storage(dict):
""" Storage for 1d integrated info """
def __init__(self,fileOrDict):
if isinstance(fileOrDict,dict):
self.filename = None
d = fileOrDict
else:
assert isinstance(fileOrDict,str)
if os.path.isdir(fileOrDict): fileOrDict = fileOrDict + "/pyfai_1d.h5"
self.filename = fileOrDict
d = storage.read(fileOrDict)
# allow accessing with .data, .delays, etc.
for k,v in d.items(): setattr(self,k,v)
# allow accessing as proper dict
self.update( **dict(d) )
def __setitem__(self, key, value):
setattr(self,key,value)
super().__setitem__(key, value)
def __delitem__(self, key):
delattr(self,key)
super().__delitem__(key)
def save(self,fname=None):
if fname is None: fname = self.filename
assert fname is not None
storage.save(fname,dict(self))
#def asdict(self): return dict(self)
def reshapeToBroadcast(what,ref): def reshapeToBroadcast(what,ref):
""" expand the 1d array 'what' to allow broadbasting to match """ expand the 1d array 'what' to allow broadbasting to match
multidimentional array 'ref'. The two arrays have to same the same multidimentional array 'ref'. The two arrays have to same the same