fixed bug in link_copy (and disable by default)

This commit is contained in:
Marco Cammarata 2017-03-13 17:42:03 +01:00
parent f7d0b88faf
commit 8a92ce981b
1 changed files with 22 additions and 16 deletions

View File

@ -55,7 +55,7 @@ def dictToH5Group(d,group,link_copy=True):
if isinstance(value,(list,tuple)): value = np.asarray(value) if isinstance(value,(list,tuple)): value = np.asarray(value)
if isinstance(value,dict): if isinstance(value,dict):
group.create_group(key) group.create_group(key)
dictToH5Group(value,group[key]) dictToH5Group(value,group[key],link_copy=link_copy)
elif value is None: elif value is None:
group[key] = "NONE_PYTHON_OBJECT" group[key] = "NONE_PYTHON_OBJECT"
elif isinstance(value,np.ndarray): elif isinstance(value,np.ndarray):
@ -65,22 +65,22 @@ def dictToH5Group(d,group,link_copy=True):
elif isinstance(value,np.ndarray) and value.ndim == 1 and isinstance(value[0],np.ndarray): elif isinstance(value,np.ndarray) and value.ndim == 1 and isinstance(value[0],np.ndarray):
group.create_group(key) group.create_group(key)
group[key].attrs["IS_LIST_OF_ARRAYS"] = True group[key].attrs["IS_LIST_OF_ARRAYS"] = True
for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key] ); for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key],link_copy=link_copy );
TOTRY = False; # don't even try to save as generic call group[key]=value TOTRY = False; # don't even try to save as generic call group[key]=value
else: if link_copy:
if link_copy: found_address = None
found_address = None for address,(file_handle,array) in _array_cache.items():
for address,(file_handle,array) in _array_cache.items(): if np.array_equal(array,value) and group.file == file_handle:
if np.array_equal(array,value) and group.file == file_handle: log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address))
log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address)) found_address = address
found_address = address break
if found_address is not None: if found_address is not None:
value = group.file[found_address] value = group.file[found_address]
try: try:
if TOTRY: if TOTRY:
group[key] = value group[key] = value
if link_copy: if link_copy:
log.info("Addind array %s to cache"%(group.name)) log.info("Addind array %s to cache"%(group[key].name))
_array_cache[ group[key].name ] = (group.file,value) _array_cache[ group[key].name ] = (group.file,value)
except Exception as e: except Exception as e:
log.warning("Can't save %s, error was %s"%(key,e)) log.warning("Can't save %s, error was %s"%(key,e))
@ -92,13 +92,13 @@ def dictToH5Group(d,group,link_copy=True):
log.error("Can't save %s, error was %s"%(key,e)) log.error("Can't save %s, error was %s"%(key,e))
def dictToH5(h5,d): def dictToH5(h5,d,link_copy=False):
""" Save a dictionary into an hdf5 file """ Save a dictionary into an hdf5 file
TODO: add capability of saving list of array TODO: add capability of saving list of array
h5py is not capable of handling dictionaries natively""" h5py is not capable of handling dictionaries natively"""
h5 = h5py.File(h5,mode="w") h5 = h5py.File(h5,mode="w")
# group = h5.create_group("/") # group = h5.create_group("/")
dictToH5Group(d,h5["/"]) dictToH5Group(d,h5["/"],link_copy=link_copy)
h5.close() h5.close()
def h5ToDict(h5,readH5pyDataset=True): def h5ToDict(h5,readH5pyDataset=True):
@ -161,7 +161,7 @@ def save(fname,d,link_copy=True):
if extension == ".npz": if extension == ".npz":
return dictToNpz(fname,d) return dictToNpz(fname,d)
elif extension == ".h5": elif extension == ".h5":
return dictToH5(fname,d) return dictToH5(fname,d,link_copy=link_copy)
elif extension == ".npy": elif extension == ".npy":
return dictToNpy(fname,d) return dictToNpy(fname,d)
else: else:
@ -286,7 +286,13 @@ class DataStorage(dict):
keys = [k for k in keys if k[0] != '_' ] keys = [k for k in keys if k[0] != '_' ]
return keys return keys
def save(self,fname=None,link_copy=True): def save(self,fname=None,link_copy=False):
""" link_copy: only works in hfd5 format
save space by creating link when identical arrays are found,
it slows down the saving (3 or 4 folds) but saves A LOT of space
when saving different dataset together (since it does not duplicate
internal pyfai matrices
"""
if fname is None: fname = self.filename if fname is None: fname = self.filename
assert fname is not None assert fname is not None
save(fname,self,link_copy=link_copy) save(fname,self,link_copy=link_copy)