storage can now use npy files + fix typo

This commit is contained in:
Marco Cammarata 2017-01-20 10:41:31 +01:00
parent a95b0aa10e
commit 07a9311f16
1 changed files with 79 additions and 17 deletions

View File

@ -1,4 +1,4 @@
""" npz/hdf5 file based storage;
""" npy/npz/hdf5 file based storage;
this modules adds the possibility to dump and load objects in files and
a more convenient was of accessing the data via the .attributedict thanks
to the DataStorage class """
@ -11,7 +11,7 @@ import logging
log = logging.getLogger(__name__) # __name__ is "foo.bar" here
def unwrapArray(a,recursive=True,readH5pyDataset=True):
""" This function takes an object (like a dictionary) and recursivively
""" This function takes an object (like a dictionary) and recursively
unwraps it solving many issues like the fact that many objects are
packaged as 0d array
This funciton has also some specific hack for handling h5py limit to
@ -57,6 +57,7 @@ def dictToH5Group(d,group):
def dictToH5(h5,d):
""" Save a dictionary into an hdf5 file
TODO: add capability of saving list of array
h5py is not capable of handling dictionaries natively"""
h5 = h5py.File(h5,mode="w")
# group = h5.create_group("/")
@ -74,19 +75,47 @@ def npzToDict(npzFile):
d = unwrapArray(d,recursive=True)
return d
def npyToDict(npyFile):
d = unwrapArray( np.load(npyFile).item() ,recursive=True)
return d
def dictToNpz(npzFile,d): np.savez(npzFile,**d)
def dictToNpy(npyFile,d): np.save(npyFile,d)
def objToDict(o,recursive=True):
""" convert a DictWrap to a dictionary (useful for saving); it should work for other objects too
TODO: this function does not catch a list of DataStorage instances like
objToDict( ( DataStorage(), DataStorage() ) )
is not converted !!
"""
if "items" not in dir(o): return o
d = dict()
for k,v in o.items():
try:
d[k] = objToDict( v )
except Exception as e:
log.info("In objToDict, could not convert key %s to dict, error was"%\
(k,e))
d[k] = v
return d
def read(fname):
extension = os.path.splitext(fname)[1]
log.info("Reading storage file %s"%fname)
if extension == ".npz":
return DataStorage(npzToDict(fname))
elif extension == ".npy":
return DataStorage(npyToDict(fname))
elif extension == ".h5":
return DataStorage(h5ToDict(fname))
else:
raise ValueError("Extension must be h5 or npz, it was %s"%extension)
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
def save(fname,d):
# make sure the object is dict (recursively) this allows reading it
# without the DataStorage module
d = objToDict(d,recursive=True)
extension = os.path.splitext(fname)[1]
log.info("Saving storage file %s"%fname)
try:
@ -94,24 +123,57 @@ def save(fname,d):
return dictToNpz(fname,d)
elif extension == ".h5":
return dictToH5(fname,d)
elif extension == ".npy":
return dictToNpy(fname,d)
else:
raise ValueError("Extension must be h5 or npz")
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
except Exception as e:
log.exception("Could not save %s"%fname)
class DataStorage(dict):
""" Storage for 1d integrated info """
def __init__(self,fileOrDict,recursive=True,
default_name='pyfai_1d',default_ext='npz'):
if isinstance(fileOrDict,dict):
self.filename = None
d = fileOrDict
""" Storage for dict like object.
recursive : bool
recursively convert dict-like objects to DataStorage
It can save data to file (format npy,npz or h5)
To initialize it:
data = DataStorage( dict( a=(1,2,3),b="add"),filename='store.npz' )
data = DataStorage( a=(1,2,3), b="add" )
reads from file if it exists
data = DataStorage( 'mysaveddata.npz' ) ;
DOES NOT READ FROM FILE (even if it exists)!!
data = DataStorage( filename = 'mysaveddata.npz' );
create empty storage (with default filename)
data = DataStorage()
"""
def __init__(self,*args,filename='data_storage.npz',recursive=True,**kwargs):
# self.filename = kwargs.pop('filename',"data_storage.npz")
self.filename = filename
# interpret kwargs as dict if there are
if len(kwargs) != 0:
fileOrDict = dict(kwargs)
elif len(kwargs)==0 and len(args)>0:
fileOrDict = args[0]
else:
assert isinstance(fileOrDict,str)
if os.path.isdir(fileOrDict):
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
self.filename = fileOrDict
fileOrDict = dict()
d = dict(); # data dictionary
if isinstance(fileOrDict,dict):
d = fileOrDict
elif isinstance(fileOrDict,str):
if os.path.exists(fileOrDict):
d = read(fileOrDict)
else:
self.filename=fileOrDict
d = dict()
else:
raise ValueError("Invalid DataStorage definition")
if recursive:
for k in d.keys():
@ -150,7 +212,7 @@ class DataStorage(dict):
s = ["DataStorage obj containing (sorted): ",]
for k in keys:
if isinstance(self[k],np.ndarray):
value_str = "array %s"% "x".join(map(str,self[k].shape))
value_str = "array, size %s, type %s"% ("x".join(map(str,self[k].shape)),self[k].dtype)
elif isinstance(self[k],DataStorage):
value_str = str(self[k])[:50] + "..."
elif isinstance(self[k],(str,DataStorage)):
@ -165,4 +227,4 @@ class DataStorage(dict):
def save(self,fname=None):
if fname is None: fname = self.filename
assert fname is not None
save(fname,dict(self))
save(fname,self)