""" npz/hdf5 file based storage; this modules adds the possibility to dump and load objects in files and a more convenient was of accessing the data via the .attributedict thanks to the DataStorage class """ import numpy as np import os import h5py import collections import logging log = logging.getLogger(__name__) # __name__ is "foo.bar" here def unwrapArray(a,recursive=True,readH5pyDataset=True): """ This function takes an object (like a dictionary) and recursivively unwraps it solving many issues like the fact that many objects are packaged as 0d array This funciton has also some specific hack for handling h5py limit to handle for example the None object or the numpy unicode ... """ # is h5py dataset convert to array if isinstance(a,h5py.Dataset) and readH5pyDataset: a = a[...] if isinstance(a,h5py.Dataset) and a.shape == (): a = a[...] if isinstance(a,np.ndarray) and a.ndim == 0 : a = a.item() if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str) if recursive: if "items" in dir(a): # dict, h5py groups, npz file a = dict(a); # convert to dict, otherwise can't asssign values for key,value in a.items(): a[key] = unwrapArray(value) elif isinstance(a,list): for index in range(len(a)): a[index] = unwrapArray(a[i]) else: pass if isinstance(a,dict): a = DataStorage(a) # restore None that cannot be saved in h5py if isinstance(a,str) and a == "NONE_PYTHON_OBJECT": a = None # h5py can't save numpy unicode if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str) return a def dictToH5Group(d,group): """ helper function that transform (recursive) a dictionary into an hdf group by creating subgroups """ for key,value in d.items(): if isinstance(value,dict): group.create_group(key) dictToH5Group(value,group[key]) else: # h5py can't handle numpy unicode arrays if isinstance(value,np.ndarray) and value.dtype.char == "U": value = np.asarray([vv.encode('ascii') for vv in value]) # h5py can't save None if value is None: value="NONE_PYTHON_OBJECT" try: group[key] = value except TypeError: log.error("Can't save %s"%(key)) def dictToH5(h5,d): """ Save a dictionary into an hdf5 file h5py is not capable of handling dictionaries natively""" h5 = h5py.File(h5,mode="w") # group = h5.create_group("/") dictToH5Group(d,h5["/"]) h5.close() def h5ToDict(h5,readH5pyDataset=True): """ Read a hdf5 file into a dictionary """ with h5py.File(h5,"r") as h: ret = unwrapArray(h,recursive=True,readH5pyDataset=readH5pyDataset) return ret def npzToDict(npzFile): with np.load(npzFile) as npz: d = dict(npz) d = unwrapArray(d,recursive=True) return d def dictToNpz(npzFile,d): np.savez(npzFile,**d) def read(fname): extension = os.path.splitext(fname)[1] log.info("Reading storage file %s"%fname) if extension == ".npz": return npzToDict(fname) elif extension == ".h5": return h5ToDict(fname) else: raise ValueError("Extension must be h5 or npz, it was %s"%extension) def save(fname,d): extension = os.path.splitext(fname)[1] log.info("Saving storage file %s"%fname) if extension == ".npz": return dictToNpz(fname,d) elif extension == ".h5": return dictToH5(fname,d) else: raise ValueError("Extension must be h5 or npz") class DataStorage(dict): """ Storage for 1d integrated info """ def __init__(self,fileOrDict,default_name='pyfai_1d',default_ext='npz'): if isinstance(fileOrDict,dict): self.filename = None d = fileOrDict else: assert isinstance(fileOrDict,str) if os.path.isdir(fileOrDict): fileOrDict = fileOrDict + "/" + default_name + "." + default_ext self.filename = fileOrDict d = read(fileOrDict) # allow accessing with .data, .delays, etc. for k,v in d.items(): setattr(self,k,v) # allow accessing as proper dict self.update( **dict(d) ) def __setitem__(self, key, value): setattr(self,key,value) super().__setitem__(key, value) def __delitem__(self, key): delattr(self,key) super().__delitem__(key) def save(self,fname=None): if fname is None: fname = self.filename assert fname is not None save(fname,dict(self))