mcutils/xray/storage.py

135 lines
4.4 KiB
Python

""" npz/hdf5 file based storage;
this modules adds the possibility to dump and load objects in files and
a more convenient was of accessing the data via the .attributedict thanks
to the DataStorage class """
import numpy as np
import os
import h5py
import collections
import logging
log = logging.getLogger(__name__) # __name__ is "foo.bar" here
def unwrapArray(a,recursive=True,readH5pyDataset=True):
""" This function takes an object (like a dictionary) and recursivively
unwraps it solving many issues like the fact that many objects are
packaged as 0d array
This funciton has also some specific hack for handling h5py limit to
handle for example the None object or the numpy unicode ...
"""
# is h5py dataset convert to array
if isinstance(a,h5py.Dataset) and readH5pyDataset: a = a[...]
if isinstance(a,h5py.Dataset) and a.shape == (): a = a[...]
if isinstance(a,np.ndarray) and a.ndim == 0 : a = a.item()
if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)
if recursive:
if "items" in dir(a): # dict, h5py groups, npz file
a = dict(a); # convert to dict, otherwise can't asssign values
for key,value in a.items(): a[key] = unwrapArray(value)
elif isinstance(a,list):
for index in range(len(a)): a[index] = unwrapArray(a[i])
else:
pass
if isinstance(a,dict): a = DataStorage(a)
# restore None that cannot be saved in h5py
if isinstance(a,str) and a == "NONE_PYTHON_OBJECT": a = None
# h5py can't save numpy unicode
if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)
return a
def dictToH5Group(d,group):
""" helper function that transform (recursive) a dictionary into an
hdf group by creating subgroups """
for key,value in d.items():
if isinstance(value,dict):
group.create_group(key)
dictToH5Group(value,group[key])
else:
# h5py can't handle numpy unicode arrays
if isinstance(value,np.ndarray) and value.dtype.char == "U":
value = np.asarray([vv.encode('ascii') for vv in value])
# h5py can't save None
if value is None: value="NONE_PYTHON_OBJECT"
try:
group[key] = value
except TypeError:
log.error("Can't save %s"%(key))
def dictToH5(h5,d):
""" Save a dictionary into an hdf5 file
h5py is not capable of handling dictionaries natively"""
h5 = h5py.File(h5,mode="w")
# group = h5.create_group("/")
dictToH5Group(d,h5["/"])
h5.close()
def h5ToDict(h5,readH5pyDataset=True):
""" Read a hdf5 file into a dictionary """
with h5py.File(h5,"r") as h:
ret = unwrapArray(h,recursive=True,readH5pyDataset=readH5pyDataset)
return ret
def npzToDict(npzFile):
with np.load(npzFile) as npz: d = dict(npz)
d = unwrapArray(d,recursive=True)
return d
def dictToNpz(npzFile,d): np.savez(npzFile,**d)
def read(fname):
extension = os.path.splitext(fname)[1]
log.info("Reading storage file %s"%fname)
if extension == ".npz":
return npzToDict(fname)
elif extension == ".h5":
return h5ToDict(fname)
else:
raise ValueError("Extension must be h5 or npz, it was %s"%extension)
def save(fname,d):
extension = os.path.splitext(fname)[1]
log.info("Saving storage file %s"%fname)
if extension == ".npz":
return dictToNpz(fname,d)
elif extension == ".h5":
return dictToH5(fname,d)
else:
raise ValueError("Extension must be h5 or npz")
class DataStorage(dict):
""" Storage for 1d integrated info """
def __init__(self,fileOrDict,default_name='pyfai_1d',default_ext='npz'):
if isinstance(fileOrDict,dict):
self.filename = None
d = fileOrDict
else:
assert isinstance(fileOrDict,str)
if os.path.isdir(fileOrDict):
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
self.filename = fileOrDict
d = read(fileOrDict)
# allow accessing with .data, .delays, etc.
for k,v in d.items(): setattr(self,k,v)
# allow accessing as proper dict
self.update( **dict(d) )
def __setitem__(self, key, value):
setattr(self,key,value)
super().__setitem__(key, value)
def __setattr__(self, key, value):
""" allows to add fields with data.test=4 """
super().__setitem__(key, value)
super().__setattr__(key,value)
def __delitem__(self, key):
delattr(self,key)
super().__delitem__(key)
def save(self,fname=None):
if fname is None: fname = self.filename
assert fname is not None
save(fname,dict(self))