fixed bug in link_copy (and disable by default)
This commit is contained in:
parent
f7d0b88faf
commit
8a92ce981b
38
storage.py
38
storage.py
|
@ -55,7 +55,7 @@ def dictToH5Group(d,group,link_copy=True):
|
||||||
if isinstance(value,(list,tuple)): value = np.asarray(value)
|
if isinstance(value,(list,tuple)): value = np.asarray(value)
|
||||||
if isinstance(value,dict):
|
if isinstance(value,dict):
|
||||||
group.create_group(key)
|
group.create_group(key)
|
||||||
dictToH5Group(value,group[key])
|
dictToH5Group(value,group[key],link_copy=link_copy)
|
||||||
elif value is None:
|
elif value is None:
|
||||||
group[key] = "NONE_PYTHON_OBJECT"
|
group[key] = "NONE_PYTHON_OBJECT"
|
||||||
elif isinstance(value,np.ndarray):
|
elif isinstance(value,np.ndarray):
|
||||||
|
@ -65,22 +65,22 @@ def dictToH5Group(d,group,link_copy=True):
|
||||||
elif isinstance(value,np.ndarray) and value.ndim == 1 and isinstance(value[0],np.ndarray):
|
elif isinstance(value,np.ndarray) and value.ndim == 1 and isinstance(value[0],np.ndarray):
|
||||||
group.create_group(key)
|
group.create_group(key)
|
||||||
group[key].attrs["IS_LIST_OF_ARRAYS"] = True
|
group[key].attrs["IS_LIST_OF_ARRAYS"] = True
|
||||||
for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key] );
|
for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key],link_copy=link_copy );
|
||||||
TOTRY = False; # don't even try to save as generic call group[key]=value
|
TOTRY = False; # don't even try to save as generic call group[key]=value
|
||||||
else:
|
if link_copy:
|
||||||
if link_copy:
|
found_address = None
|
||||||
found_address = None
|
for address,(file_handle,array) in _array_cache.items():
|
||||||
for address,(file_handle,array) in _array_cache.items():
|
if np.array_equal(array,value) and group.file == file_handle:
|
||||||
if np.array_equal(array,value) and group.file == file_handle:
|
log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address))
|
||||||
log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address))
|
found_address = address
|
||||||
found_address = address
|
break
|
||||||
if found_address is not None:
|
if found_address is not None:
|
||||||
value = group.file[found_address]
|
value = group.file[found_address]
|
||||||
try:
|
try:
|
||||||
if TOTRY:
|
if TOTRY:
|
||||||
group[key] = value
|
group[key] = value
|
||||||
if link_copy:
|
if link_copy:
|
||||||
log.info("Addind array %s to cache"%(group.name))
|
log.info("Addind array %s to cache"%(group[key].name))
|
||||||
_array_cache[ group[key].name ] = (group.file,value)
|
_array_cache[ group[key].name ] = (group.file,value)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning("Can't save %s, error was %s"%(key,e))
|
log.warning("Can't save %s, error was %s"%(key,e))
|
||||||
|
@ -92,13 +92,13 @@ def dictToH5Group(d,group,link_copy=True):
|
||||||
log.error("Can't save %s, error was %s"%(key,e))
|
log.error("Can't save %s, error was %s"%(key,e))
|
||||||
|
|
||||||
|
|
||||||
def dictToH5(h5,d):
|
def dictToH5(h5,d,link_copy=False):
|
||||||
""" Save a dictionary into an hdf5 file
|
""" Save a dictionary into an hdf5 file
|
||||||
TODO: add capability of saving list of array
|
TODO: add capability of saving list of array
|
||||||
h5py is not capable of handling dictionaries natively"""
|
h5py is not capable of handling dictionaries natively"""
|
||||||
h5 = h5py.File(h5,mode="w")
|
h5 = h5py.File(h5,mode="w")
|
||||||
# group = h5.create_group("/")
|
# group = h5.create_group("/")
|
||||||
dictToH5Group(d,h5["/"])
|
dictToH5Group(d,h5["/"],link_copy=link_copy)
|
||||||
h5.close()
|
h5.close()
|
||||||
|
|
||||||
def h5ToDict(h5,readH5pyDataset=True):
|
def h5ToDict(h5,readH5pyDataset=True):
|
||||||
|
@ -161,7 +161,7 @@ def save(fname,d,link_copy=True):
|
||||||
if extension == ".npz":
|
if extension == ".npz":
|
||||||
return dictToNpz(fname,d)
|
return dictToNpz(fname,d)
|
||||||
elif extension == ".h5":
|
elif extension == ".h5":
|
||||||
return dictToH5(fname,d)
|
return dictToH5(fname,d,link_copy=link_copy)
|
||||||
elif extension == ".npy":
|
elif extension == ".npy":
|
||||||
return dictToNpy(fname,d)
|
return dictToNpy(fname,d)
|
||||||
else:
|
else:
|
||||||
|
@ -286,7 +286,13 @@ class DataStorage(dict):
|
||||||
keys = [k for k in keys if k[0] != '_' ]
|
keys = [k for k in keys if k[0] != '_' ]
|
||||||
return keys
|
return keys
|
||||||
|
|
||||||
def save(self,fname=None,link_copy=True):
|
def save(self,fname=None,link_copy=False):
|
||||||
|
""" link_copy: only works in hfd5 format
|
||||||
|
save space by creating link when identical arrays are found,
|
||||||
|
it slows down the saving (3 or 4 folds) but saves A LOT of space
|
||||||
|
when saving different dataset together (since it does not duplicate
|
||||||
|
internal pyfai matrices
|
||||||
|
"""
|
||||||
if fname is None: fname = self.filename
|
if fname is None: fname = self.filename
|
||||||
assert fname is not None
|
assert fname is not None
|
||||||
save(fname,self,link_copy=link_copy)
|
save(fname,self,link_copy=link_copy)
|
||||||
|
|
Loading…
Reference in New Issue