storage can now use npy files + fix typo
This commit is contained in:
parent
a95b0aa10e
commit
07a9311f16
|
@ -1,4 +1,4 @@
|
||||||
""" npz/hdf5 file based storage;
|
""" npy/npz/hdf5 file based storage;
|
||||||
this modules adds the possibility to dump and load objects in files and
|
this modules adds the possibility to dump and load objects in files and
|
||||||
a more convenient was of accessing the data via the .attributedict thanks
|
a more convenient was of accessing the data via the .attributedict thanks
|
||||||
to the DataStorage class """
|
to the DataStorage class """
|
||||||
|
@ -11,7 +11,7 @@ import logging
|
||||||
log = logging.getLogger(__name__) # __name__ is "foo.bar" here
|
log = logging.getLogger(__name__) # __name__ is "foo.bar" here
|
||||||
|
|
||||||
def unwrapArray(a,recursive=True,readH5pyDataset=True):
|
def unwrapArray(a,recursive=True,readH5pyDataset=True):
|
||||||
""" This function takes an object (like a dictionary) and recursivively
|
""" This function takes an object (like a dictionary) and recursively
|
||||||
unwraps it solving many issues like the fact that many objects are
|
unwraps it solving many issues like the fact that many objects are
|
||||||
packaged as 0d array
|
packaged as 0d array
|
||||||
This funciton has also some specific hack for handling h5py limit to
|
This funciton has also some specific hack for handling h5py limit to
|
||||||
|
@ -57,6 +57,7 @@ def dictToH5Group(d,group):
|
||||||
|
|
||||||
def dictToH5(h5,d):
|
def dictToH5(h5,d):
|
||||||
""" Save a dictionary into an hdf5 file
|
""" Save a dictionary into an hdf5 file
|
||||||
|
TODO: add capability of saving list of array
|
||||||
h5py is not capable of handling dictionaries natively"""
|
h5py is not capable of handling dictionaries natively"""
|
||||||
h5 = h5py.File(h5,mode="w")
|
h5 = h5py.File(h5,mode="w")
|
||||||
# group = h5.create_group("/")
|
# group = h5.create_group("/")
|
||||||
|
@ -74,19 +75,47 @@ def npzToDict(npzFile):
|
||||||
d = unwrapArray(d,recursive=True)
|
d = unwrapArray(d,recursive=True)
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def npyToDict(npyFile):
|
||||||
|
d = unwrapArray( np.load(npyFile).item() ,recursive=True)
|
||||||
|
return d
|
||||||
|
|
||||||
def dictToNpz(npzFile,d): np.savez(npzFile,**d)
|
def dictToNpz(npzFile,d): np.savez(npzFile,**d)
|
||||||
|
def dictToNpy(npyFile,d): np.save(npyFile,d)
|
||||||
|
|
||||||
|
def objToDict(o,recursive=True):
|
||||||
|
""" convert a DictWrap to a dictionary (useful for saving); it should work for other objects too
|
||||||
|
TODO: this function does not catch a list of DataStorage instances like
|
||||||
|
objToDict( ( DataStorage(), DataStorage() ) )
|
||||||
|
is not converted !!
|
||||||
|
"""
|
||||||
|
if "items" not in dir(o): return o
|
||||||
|
d = dict()
|
||||||
|
for k,v in o.items():
|
||||||
|
try:
|
||||||
|
d[k] = objToDict( v )
|
||||||
|
except Exception as e:
|
||||||
|
log.info("In objToDict, could not convert key %s to dict, error was"%\
|
||||||
|
(k,e))
|
||||||
|
d[k] = v
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
def read(fname):
|
def read(fname):
|
||||||
extension = os.path.splitext(fname)[1]
|
extension = os.path.splitext(fname)[1]
|
||||||
log.info("Reading storage file %s"%fname)
|
log.info("Reading storage file %s"%fname)
|
||||||
if extension == ".npz":
|
if extension == ".npz":
|
||||||
return DataStorage(npzToDict(fname))
|
return DataStorage(npzToDict(fname))
|
||||||
|
elif extension == ".npy":
|
||||||
|
return DataStorage(npyToDict(fname))
|
||||||
elif extension == ".h5":
|
elif extension == ".h5":
|
||||||
return DataStorage(h5ToDict(fname))
|
return DataStorage(h5ToDict(fname))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Extension must be h5 or npz, it was %s"%extension)
|
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
|
||||||
|
|
||||||
def save(fname,d):
|
def save(fname,d):
|
||||||
|
# make sure the object is dict (recursively) this allows reading it
|
||||||
|
# without the DataStorage module
|
||||||
|
d = objToDict(d,recursive=True)
|
||||||
extension = os.path.splitext(fname)[1]
|
extension = os.path.splitext(fname)[1]
|
||||||
log.info("Saving storage file %s"%fname)
|
log.info("Saving storage file %s"%fname)
|
||||||
try:
|
try:
|
||||||
|
@ -94,24 +123,57 @@ def save(fname,d):
|
||||||
return dictToNpz(fname,d)
|
return dictToNpz(fname,d)
|
||||||
elif extension == ".h5":
|
elif extension == ".h5":
|
||||||
return dictToH5(fname,d)
|
return dictToH5(fname,d)
|
||||||
|
elif extension == ".npy":
|
||||||
|
return dictToNpy(fname,d)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Extension must be h5 or npz")
|
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception("Could not save %s"%fname)
|
log.exception("Could not save %s"%fname)
|
||||||
|
|
||||||
|
|
||||||
class DataStorage(dict):
|
class DataStorage(dict):
|
||||||
""" Storage for 1d integrated info """
|
""" Storage for dict like object.
|
||||||
def __init__(self,fileOrDict,recursive=True,
|
recursive : bool
|
||||||
default_name='pyfai_1d',default_ext='npz'):
|
recursively convert dict-like objects to DataStorage
|
||||||
if isinstance(fileOrDict,dict):
|
It can save data to file (format npy,npz or h5)
|
||||||
self.filename = None
|
|
||||||
d = fileOrDict
|
To initialize it:
|
||||||
|
|
||||||
|
data = DataStorage( dict( a=(1,2,3),b="add"),filename='store.npz' )
|
||||||
|
|
||||||
|
data = DataStorage( a=(1,2,3), b="add" )
|
||||||
|
|
||||||
|
reads from file if it exists
|
||||||
|
data = DataStorage( 'mysaveddata.npz' ) ;
|
||||||
|
|
||||||
|
DOES NOT READ FROM FILE (even if it exists)!!
|
||||||
|
data = DataStorage( filename = 'mysaveddata.npz' );
|
||||||
|
|
||||||
|
create empty storage (with default filename)
|
||||||
|
data = DataStorage()
|
||||||
|
"""
|
||||||
|
def __init__(self,*args,filename='data_storage.npz',recursive=True,**kwargs):
|
||||||
|
# self.filename = kwargs.pop('filename',"data_storage.npz")
|
||||||
|
self.filename = filename
|
||||||
|
# interpret kwargs as dict if there are
|
||||||
|
if len(kwargs) != 0:
|
||||||
|
fileOrDict = dict(kwargs)
|
||||||
|
elif len(kwargs)==0 and len(args)>0:
|
||||||
|
fileOrDict = args[0]
|
||||||
else:
|
else:
|
||||||
assert isinstance(fileOrDict,str)
|
fileOrDict = dict()
|
||||||
if os.path.isdir(fileOrDict):
|
|
||||||
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
|
d = dict(); # data dictionary
|
||||||
self.filename = fileOrDict
|
if isinstance(fileOrDict,dict):
|
||||||
d = read(fileOrDict)
|
d = fileOrDict
|
||||||
|
elif isinstance(fileOrDict,str):
|
||||||
|
if os.path.exists(fileOrDict):
|
||||||
|
d = read(fileOrDict)
|
||||||
|
else:
|
||||||
|
self.filename=fileOrDict
|
||||||
|
d = dict()
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid DataStorage definition")
|
||||||
|
|
||||||
if recursive:
|
if recursive:
|
||||||
for k in d.keys():
|
for k in d.keys():
|
||||||
|
@ -150,7 +212,7 @@ class DataStorage(dict):
|
||||||
s = ["DataStorage obj containing (sorted): ",]
|
s = ["DataStorage obj containing (sorted): ",]
|
||||||
for k in keys:
|
for k in keys:
|
||||||
if isinstance(self[k],np.ndarray):
|
if isinstance(self[k],np.ndarray):
|
||||||
value_str = "array %s"% "x".join(map(str,self[k].shape))
|
value_str = "array, size %s, type %s"% ("x".join(map(str,self[k].shape)),self[k].dtype)
|
||||||
elif isinstance(self[k],DataStorage):
|
elif isinstance(self[k],DataStorage):
|
||||||
value_str = str(self[k])[:50] + "..."
|
value_str = str(self[k])[:50] + "..."
|
||||||
elif isinstance(self[k],(str,DataStorage)):
|
elif isinstance(self[k],(str,DataStorage)):
|
||||||
|
@ -165,4 +227,4 @@ class DataStorage(dict):
|
||||||
def save(self,fname=None):
|
def save(self,fname=None):
|
||||||
if fname is None: fname = self.filename
|
if fname is None: fname = self.filename
|
||||||
assert fname is not None
|
assert fname is not None
|
||||||
save(fname,dict(self))
|
save(fname,self)
|
Loading…
Reference in New Issue