storage can now use npy files + fix typo

This commit is contained in:
Marco Cammarata 2017-01-20 10:41:31 +01:00
parent a95b0aa10e
commit 07a9311f16
1 changed files with 79 additions and 17 deletions

View File

@ -1,4 +1,4 @@
""" npz/hdf5 file based storage; """ npy/npz/hdf5 file based storage;
this modules adds the possibility to dump and load objects in files and this modules adds the possibility to dump and load objects in files and
a more convenient was of accessing the data via the .attributedict thanks a more convenient was of accessing the data via the .attributedict thanks
to the DataStorage class """ to the DataStorage class """
@ -11,7 +11,7 @@ import logging
log = logging.getLogger(__name__) # __name__ is "foo.bar" here log = logging.getLogger(__name__) # __name__ is "foo.bar" here
def unwrapArray(a,recursive=True,readH5pyDataset=True): def unwrapArray(a,recursive=True,readH5pyDataset=True):
""" This function takes an object (like a dictionary) and recursivively """ This function takes an object (like a dictionary) and recursively
unwraps it solving many issues like the fact that many objects are unwraps it solving many issues like the fact that many objects are
packaged as 0d array packaged as 0d array
This funciton has also some specific hack for handling h5py limit to This funciton has also some specific hack for handling h5py limit to
@ -57,6 +57,7 @@ def dictToH5Group(d,group):
def dictToH5(h5,d): def dictToH5(h5,d):
""" Save a dictionary into an hdf5 file """ Save a dictionary into an hdf5 file
TODO: add capability of saving list of array
h5py is not capable of handling dictionaries natively""" h5py is not capable of handling dictionaries natively"""
h5 = h5py.File(h5,mode="w") h5 = h5py.File(h5,mode="w")
# group = h5.create_group("/") # group = h5.create_group("/")
@ -74,19 +75,47 @@ def npzToDict(npzFile):
d = unwrapArray(d,recursive=True) d = unwrapArray(d,recursive=True)
return d return d
def npyToDict(npyFile):
d = unwrapArray( np.load(npyFile).item() ,recursive=True)
return d
def dictToNpz(npzFile,d): np.savez(npzFile,**d) def dictToNpz(npzFile,d): np.savez(npzFile,**d)
def dictToNpy(npyFile,d): np.save(npyFile,d)
def objToDict(o,recursive=True):
""" convert a DictWrap to a dictionary (useful for saving); it should work for other objects too
TODO: this function does not catch a list of DataStorage instances like
objToDict( ( DataStorage(), DataStorage() ) )
is not converted !!
"""
if "items" not in dir(o): return o
d = dict()
for k,v in o.items():
try:
d[k] = objToDict( v )
except Exception as e:
log.info("In objToDict, could not convert key %s to dict, error was"%\
(k,e))
d[k] = v
return d
def read(fname): def read(fname):
extension = os.path.splitext(fname)[1] extension = os.path.splitext(fname)[1]
log.info("Reading storage file %s"%fname) log.info("Reading storage file %s"%fname)
if extension == ".npz": if extension == ".npz":
return DataStorage(npzToDict(fname)) return DataStorage(npzToDict(fname))
elif extension == ".npy":
return DataStorage(npyToDict(fname))
elif extension == ".h5": elif extension == ".h5":
return DataStorage(h5ToDict(fname)) return DataStorage(h5ToDict(fname))
else: else:
raise ValueError("Extension must be h5 or npz, it was %s"%extension) raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
def save(fname,d): def save(fname,d):
# make sure the object is dict (recursively) this allows reading it
# without the DataStorage module
d = objToDict(d,recursive=True)
extension = os.path.splitext(fname)[1] extension = os.path.splitext(fname)[1]
log.info("Saving storage file %s"%fname) log.info("Saving storage file %s"%fname)
try: try:
@ -94,24 +123,57 @@ def save(fname,d):
return dictToNpz(fname,d) return dictToNpz(fname,d)
elif extension == ".h5": elif extension == ".h5":
return dictToH5(fname,d) return dictToH5(fname,d)
elif extension == ".npy":
return dictToNpy(fname,d)
else: else:
raise ValueError("Extension must be h5 or npz") raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
except Exception as e: except Exception as e:
log.exception("Could not save %s"%fname) log.exception("Could not save %s"%fname)
class DataStorage(dict): class DataStorage(dict):
""" Storage for 1d integrated info """ """ Storage for dict like object.
def __init__(self,fileOrDict,recursive=True, recursive : bool
default_name='pyfai_1d',default_ext='npz'): recursively convert dict-like objects to DataStorage
if isinstance(fileOrDict,dict): It can save data to file (format npy,npz or h5)
self.filename = None
d = fileOrDict To initialize it:
data = DataStorage( dict( a=(1,2,3),b="add"),filename='store.npz' )
data = DataStorage( a=(1,2,3), b="add" )
reads from file if it exists
data = DataStorage( 'mysaveddata.npz' ) ;
DOES NOT READ FROM FILE (even if it exists)!!
data = DataStorage( filename = 'mysaveddata.npz' );
create empty storage (with default filename)
data = DataStorage()
"""
def __init__(self,*args,filename='data_storage.npz',recursive=True,**kwargs):
# self.filename = kwargs.pop('filename',"data_storage.npz")
self.filename = filename
# interpret kwargs as dict if there are
if len(kwargs) != 0:
fileOrDict = dict(kwargs)
elif len(kwargs)==0 and len(args)>0:
fileOrDict = args[0]
else: else:
assert isinstance(fileOrDict,str) fileOrDict = dict()
if os.path.isdir(fileOrDict):
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext d = dict(); # data dictionary
self.filename = fileOrDict if isinstance(fileOrDict,dict):
d = read(fileOrDict) d = fileOrDict
elif isinstance(fileOrDict,str):
if os.path.exists(fileOrDict):
d = read(fileOrDict)
else:
self.filename=fileOrDict
d = dict()
else:
raise ValueError("Invalid DataStorage definition")
if recursive: if recursive:
for k in d.keys(): for k in d.keys():
@ -150,7 +212,7 @@ class DataStorage(dict):
s = ["DataStorage obj containing (sorted): ",] s = ["DataStorage obj containing (sorted): ",]
for k in keys: for k in keys:
if isinstance(self[k],np.ndarray): if isinstance(self[k],np.ndarray):
value_str = "array %s"% "x".join(map(str,self[k].shape)) value_str = "array, size %s, type %s"% ("x".join(map(str,self[k].shape)),self[k].dtype)
elif isinstance(self[k],DataStorage): elif isinstance(self[k],DataStorage):
value_str = str(self[k])[:50] + "..." value_str = str(self[k])[:50] + "..."
elif isinstance(self[k],(str,DataStorage)): elif isinstance(self[k],(str,DataStorage)):
@ -165,4 +227,4 @@ class DataStorage(dict):
def save(self,fname=None): def save(self,fname=None):
if fname is None: fname = self.filename if fname is None: fname = self.filename
assert fname is not None assert fname is not None
save(fname,dict(self)) save(fname,self)