From 07a9311f160b0e11a3a3b01e2ff099bdb44b2924 Mon Sep 17 00:00:00 2001 From: Marco Cammarata Date: Fri, 20 Jan 2017 10:41:31 +0100 Subject: [PATCH] storage can now use npy files + fix typo --- xray/storage.py => storage.py | 96 ++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 17 deletions(-) rename xray/storage.py => storage.py (65%) diff --git a/xray/storage.py b/storage.py similarity index 65% rename from xray/storage.py rename to storage.py index 5d41e2f..6185a34 100644 --- a/xray/storage.py +++ b/storage.py @@ -1,4 +1,4 @@ -""" npz/hdf5 file based storage; +""" npy/npz/hdf5 file based storage; this modules adds the possibility to dump and load objects in files and a more convenient was of accessing the data via the .attributedict thanks to the DataStorage class """ @@ -11,7 +11,7 @@ import logging log = logging.getLogger(__name__) # __name__ is "foo.bar" here def unwrapArray(a,recursive=True,readH5pyDataset=True): - """ This function takes an object (like a dictionary) and recursivively + """ This function takes an object (like a dictionary) and recursively unwraps it solving many issues like the fact that many objects are packaged as 0d array This funciton has also some specific hack for handling h5py limit to @@ -57,6 +57,7 @@ def dictToH5Group(d,group): def dictToH5(h5,d): """ Save a dictionary into an hdf5 file + TODO: add capability of saving list of array h5py is not capable of handling dictionaries natively""" h5 = h5py.File(h5,mode="w") # group = h5.create_group("/") @@ -74,19 +75,47 @@ def npzToDict(npzFile): d = unwrapArray(d,recursive=True) return d +def npyToDict(npyFile): + d = unwrapArray( np.load(npyFile).item() ,recursive=True) + return d + def dictToNpz(npzFile,d): np.savez(npzFile,**d) +def dictToNpy(npyFile,d): np.save(npyFile,d) + +def objToDict(o,recursive=True): + """ convert a DictWrap to a dictionary (useful for saving); it should work for other objects too + TODO: this function does not catch a list of DataStorage instances like + objToDict( ( DataStorage(), DataStorage() ) ) + is not converted !! + """ + if "items" not in dir(o): return o + d = dict() + for k,v in o.items(): + try: + d[k] = objToDict( v ) + except Exception as e: + log.info("In objToDict, could not convert key %s to dict, error was"%\ + (k,e)) + d[k] = v + return d + def read(fname): extension = os.path.splitext(fname)[1] log.info("Reading storage file %s"%fname) if extension == ".npz": return DataStorage(npzToDict(fname)) + elif extension == ".npy": + return DataStorage(npyToDict(fname)) elif extension == ".h5": return DataStorage(h5ToDict(fname)) else: - raise ValueError("Extension must be h5 or npz, it was %s"%extension) + raise ValueError("Extension must be h5, npy or npz, it was %s"%extension) def save(fname,d): + # make sure the object is dict (recursively) this allows reading it + # without the DataStorage module + d = objToDict(d,recursive=True) extension = os.path.splitext(fname)[1] log.info("Saving storage file %s"%fname) try: @@ -94,24 +123,57 @@ def save(fname,d): return dictToNpz(fname,d) elif extension == ".h5": return dictToH5(fname,d) + elif extension == ".npy": + return dictToNpy(fname,d) else: - raise ValueError("Extension must be h5 or npz") + raise ValueError("Extension must be h5, npy or npz, it was %s"%extension) except Exception as e: log.exception("Could not save %s"%fname) + class DataStorage(dict): - """ Storage for 1d integrated info """ - def __init__(self,fileOrDict,recursive=True, - default_name='pyfai_1d',default_ext='npz'): - if isinstance(fileOrDict,dict): - self.filename = None - d = fileOrDict + """ Storage for dict like object. + recursive : bool + recursively convert dict-like objects to DataStorage + It can save data to file (format npy,npz or h5) + + To initialize it: + + data = DataStorage( dict( a=(1,2,3),b="add"),filename='store.npz' ) + + data = DataStorage( a=(1,2,3), b="add" ) + + reads from file if it exists + data = DataStorage( 'mysaveddata.npz' ) ; + + DOES NOT READ FROM FILE (even if it exists)!! + data = DataStorage( filename = 'mysaveddata.npz' ); + + create empty storage (with default filename) + data = DataStorage() + """ + def __init__(self,*args,filename='data_storage.npz',recursive=True,**kwargs): +# self.filename = kwargs.pop('filename',"data_storage.npz") + self.filename = filename + # interpret kwargs as dict if there are + if len(kwargs) != 0: + fileOrDict = dict(kwargs) + elif len(kwargs)==0 and len(args)>0: + fileOrDict = args[0] else: - assert isinstance(fileOrDict,str) - if os.path.isdir(fileOrDict): - fileOrDict = fileOrDict + "/" + default_name + "." + default_ext - self.filename = fileOrDict - d = read(fileOrDict) + fileOrDict = dict() + + d = dict(); # data dictionary + if isinstance(fileOrDict,dict): + d = fileOrDict + elif isinstance(fileOrDict,str): + if os.path.exists(fileOrDict): + d = read(fileOrDict) + else: + self.filename=fileOrDict + d = dict() + else: + raise ValueError("Invalid DataStorage definition") if recursive: for k in d.keys(): @@ -150,7 +212,7 @@ class DataStorage(dict): s = ["DataStorage obj containing (sorted): ",] for k in keys: if isinstance(self[k],np.ndarray): - value_str = "array %s"% "x".join(map(str,self[k].shape)) + value_str = "array, size %s, type %s"% ("x".join(map(str,self[k].shape)),self[k].dtype) elif isinstance(self[k],DataStorage): value_str = str(self[k])[:50] + "..." elif isinstance(self[k],(str,DataStorage)): @@ -165,4 +227,4 @@ class DataStorage(dict): def save(self,fname=None): if fname is None: fname = self.filename assert fname is not None - save(fname,dict(self)) + save(fname,self)