diff --git a/storage.py b/storage.py index b6d8219..215ccf6 100644 --- a/storage.py +++ b/storage.py @@ -55,7 +55,7 @@ def dictToH5Group(d,group,link_copy=True): if isinstance(value,(list,tuple)): value = np.asarray(value) if isinstance(value,dict): group.create_group(key) - dictToH5Group(value,group[key]) + dictToH5Group(value,group[key],link_copy=link_copy) elif value is None: group[key] = "NONE_PYTHON_OBJECT" elif isinstance(value,np.ndarray): @@ -65,22 +65,22 @@ def dictToH5Group(d,group,link_copy=True): elif isinstance(value,np.ndarray) and value.ndim == 1 and isinstance(value[0],np.ndarray): group.create_group(key) group[key].attrs["IS_LIST_OF_ARRAYS"] = True - for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key] ); + for index,array in enumerate(value): dictToH5Group( { "index%010d"%index : array},group[key],link_copy=link_copy ); TOTRY = False; # don't even try to save as generic call group[key]=value - else: - if link_copy: - found_address = None - for address,(file_handle,array) in _array_cache.items(): - if np.array_equal(array,value) and group.file == file_handle: - log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address)) - found_address = address - if found_address is not None: - value = group.file[found_address] + if link_copy: + found_address = None + for address,(file_handle,array) in _array_cache.items(): + if np.array_equal(array,value) and group.file == file_handle: + log.info("Found array in cache, asked for %s/%s, found as %s"%(group.name,key,address)) + found_address = address + break + if found_address is not None: + value = group.file[found_address] try: if TOTRY: group[key] = value if link_copy: - log.info("Addind array %s to cache"%(group.name)) + log.info("Addind array %s to cache"%(group[key].name)) _array_cache[ group[key].name ] = (group.file,value) except Exception as e: log.warning("Can't save %s, error was %s"%(key,e)) @@ -92,13 +92,13 @@ def dictToH5Group(d,group,link_copy=True): log.error("Can't save %s, error was %s"%(key,e)) -def dictToH5(h5,d): +def dictToH5(h5,d,link_copy=False): """ Save a dictionary into an hdf5 file TODO: add capability of saving list of array h5py is not capable of handling dictionaries natively""" h5 = h5py.File(h5,mode="w") # group = h5.create_group("/") - dictToH5Group(d,h5["/"]) + dictToH5Group(d,h5["/"],link_copy=link_copy) h5.close() def h5ToDict(h5,readH5pyDataset=True): @@ -161,7 +161,7 @@ def save(fname,d,link_copy=True): if extension == ".npz": return dictToNpz(fname,d) elif extension == ".h5": - return dictToH5(fname,d) + return dictToH5(fname,d,link_copy=link_copy) elif extension == ".npy": return dictToNpy(fname,d) else: @@ -286,7 +286,13 @@ class DataStorage(dict): keys = [k for k in keys if k[0] != '_' ] return keys - def save(self,fname=None,link_copy=True): + def save(self,fname=None,link_copy=False): + """ link_copy: only works in hfd5 format + save space by creating link when identical arrays are found, + it slows down the saving (3 or 4 folds) but saves A LOT of space + when saving different dataset together (since it does not duplicate + internal pyfai matrices + """ if fname is None: fname = self.filename assert fname is not None save(fname,self,link_copy=link_copy)