storage can now use npy files + fix typo
This commit is contained in:
parent
a95b0aa10e
commit
07a9311f16
|
@ -1,4 +1,4 @@
|
|||
""" npz/hdf5 file based storage;
|
||||
""" npy/npz/hdf5 file based storage;
|
||||
this modules adds the possibility to dump and load objects in files and
|
||||
a more convenient was of accessing the data via the .attributedict thanks
|
||||
to the DataStorage class """
|
||||
|
@ -11,7 +11,7 @@ import logging
|
|||
log = logging.getLogger(__name__) # __name__ is "foo.bar" here
|
||||
|
||||
def unwrapArray(a,recursive=True,readH5pyDataset=True):
|
||||
""" This function takes an object (like a dictionary) and recursivively
|
||||
""" This function takes an object (like a dictionary) and recursively
|
||||
unwraps it solving many issues like the fact that many objects are
|
||||
packaged as 0d array
|
||||
This funciton has also some specific hack for handling h5py limit to
|
||||
|
@ -57,6 +57,7 @@ def dictToH5Group(d,group):
|
|||
|
||||
def dictToH5(h5,d):
|
||||
""" Save a dictionary into an hdf5 file
|
||||
TODO: add capability of saving list of array
|
||||
h5py is not capable of handling dictionaries natively"""
|
||||
h5 = h5py.File(h5,mode="w")
|
||||
# group = h5.create_group("/")
|
||||
|
@ -74,19 +75,47 @@ def npzToDict(npzFile):
|
|||
d = unwrapArray(d,recursive=True)
|
||||
return d
|
||||
|
||||
def npyToDict(npyFile):
|
||||
d = unwrapArray( np.load(npyFile).item() ,recursive=True)
|
||||
return d
|
||||
|
||||
def dictToNpz(npzFile,d): np.savez(npzFile,**d)
|
||||
def dictToNpy(npyFile,d): np.save(npyFile,d)
|
||||
|
||||
def objToDict(o,recursive=True):
|
||||
""" convert a DictWrap to a dictionary (useful for saving); it should work for other objects too
|
||||
TODO: this function does not catch a list of DataStorage instances like
|
||||
objToDict( ( DataStorage(), DataStorage() ) )
|
||||
is not converted !!
|
||||
"""
|
||||
if "items" not in dir(o): return o
|
||||
d = dict()
|
||||
for k,v in o.items():
|
||||
try:
|
||||
d[k] = objToDict( v )
|
||||
except Exception as e:
|
||||
log.info("In objToDict, could not convert key %s to dict, error was"%\
|
||||
(k,e))
|
||||
d[k] = v
|
||||
return d
|
||||
|
||||
|
||||
def read(fname):
|
||||
extension = os.path.splitext(fname)[1]
|
||||
log.info("Reading storage file %s"%fname)
|
||||
if extension == ".npz":
|
||||
return DataStorage(npzToDict(fname))
|
||||
elif extension == ".npy":
|
||||
return DataStorage(npyToDict(fname))
|
||||
elif extension == ".h5":
|
||||
return DataStorage(h5ToDict(fname))
|
||||
else:
|
||||
raise ValueError("Extension must be h5 or npz, it was %s"%extension)
|
||||
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
|
||||
|
||||
def save(fname,d):
|
||||
# make sure the object is dict (recursively) this allows reading it
|
||||
# without the DataStorage module
|
||||
d = objToDict(d,recursive=True)
|
||||
extension = os.path.splitext(fname)[1]
|
||||
log.info("Saving storage file %s"%fname)
|
||||
try:
|
||||
|
@ -94,24 +123,57 @@ def save(fname,d):
|
|||
return dictToNpz(fname,d)
|
||||
elif extension == ".h5":
|
||||
return dictToH5(fname,d)
|
||||
elif extension == ".npy":
|
||||
return dictToNpy(fname,d)
|
||||
else:
|
||||
raise ValueError("Extension must be h5 or npz")
|
||||
raise ValueError("Extension must be h5, npy or npz, it was %s"%extension)
|
||||
except Exception as e:
|
||||
log.exception("Could not save %s"%fname)
|
||||
|
||||
|
||||
class DataStorage(dict):
|
||||
""" Storage for 1d integrated info """
|
||||
def __init__(self,fileOrDict,recursive=True,
|
||||
default_name='pyfai_1d',default_ext='npz'):
|
||||
if isinstance(fileOrDict,dict):
|
||||
self.filename = None
|
||||
d = fileOrDict
|
||||
""" Storage for dict like object.
|
||||
recursive : bool
|
||||
recursively convert dict-like objects to DataStorage
|
||||
It can save data to file (format npy,npz or h5)
|
||||
|
||||
To initialize it:
|
||||
|
||||
data = DataStorage( dict( a=(1,2,3),b="add"),filename='store.npz' )
|
||||
|
||||
data = DataStorage( a=(1,2,3), b="add" )
|
||||
|
||||
reads from file if it exists
|
||||
data = DataStorage( 'mysaveddata.npz' ) ;
|
||||
|
||||
DOES NOT READ FROM FILE (even if it exists)!!
|
||||
data = DataStorage( filename = 'mysaveddata.npz' );
|
||||
|
||||
create empty storage (with default filename)
|
||||
data = DataStorage()
|
||||
"""
|
||||
def __init__(self,*args,filename='data_storage.npz',recursive=True,**kwargs):
|
||||
# self.filename = kwargs.pop('filename',"data_storage.npz")
|
||||
self.filename = filename
|
||||
# interpret kwargs as dict if there are
|
||||
if len(kwargs) != 0:
|
||||
fileOrDict = dict(kwargs)
|
||||
elif len(kwargs)==0 and len(args)>0:
|
||||
fileOrDict = args[0]
|
||||
else:
|
||||
assert isinstance(fileOrDict,str)
|
||||
if os.path.isdir(fileOrDict):
|
||||
fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
|
||||
self.filename = fileOrDict
|
||||
d = read(fileOrDict)
|
||||
fileOrDict = dict()
|
||||
|
||||
d = dict(); # data dictionary
|
||||
if isinstance(fileOrDict,dict):
|
||||
d = fileOrDict
|
||||
elif isinstance(fileOrDict,str):
|
||||
if os.path.exists(fileOrDict):
|
||||
d = read(fileOrDict)
|
||||
else:
|
||||
self.filename=fileOrDict
|
||||
d = dict()
|
||||
else:
|
||||
raise ValueError("Invalid DataStorage definition")
|
||||
|
||||
if recursive:
|
||||
for k in d.keys():
|
||||
|
@ -150,7 +212,7 @@ class DataStorage(dict):
|
|||
s = ["DataStorage obj containing (sorted): ",]
|
||||
for k in keys:
|
||||
if isinstance(self[k],np.ndarray):
|
||||
value_str = "array %s"% "x".join(map(str,self[k].shape))
|
||||
value_str = "array, size %s, type %s"% ("x".join(map(str,self[k].shape)),self[k].dtype)
|
||||
elif isinstance(self[k],DataStorage):
|
||||
value_str = str(self[k])[:50] + "..."
|
||||
elif isinstance(self[k],(str,DataStorage)):
|
||||
|
@ -165,4 +227,4 @@ class DataStorage(dict):
|
|||
def save(self,fname=None):
|
||||
if fname is None: fname = self.filename
|
||||
assert fname is not None
|
||||
save(fname,dict(self))
|
||||
save(fname,self)
|
Loading…
Reference in New Issue