mcutils/xray/storage.py

""" npz/hdf5 file based storage; 
    this modules adds the possibility to dump and load objects in files and
    a more convenient was of accessing the data via the .attributedict thanks
    to the DataStorage class """
import numpy as np
import os
import h5py 
import collections

import logging
log = logging.getLogger(__name__)  # __name__ is "foo.bar" here

def unwrapArray(a,recursive=True,readH5pyDataset=True):
  """ This function takes an object (like a dictionary) and recursivively
      unwraps it solving many issues like the fact that many objects are 
      packaged as 0d array
      This funciton has also some specific hack for handling h5py limit to 
      handle for example the None object or the numpy unicode ...
  """
  # is h5py dataset convert to array
  if isinstance(a,h5py.Dataset) and readH5pyDataset: a = a[...]
  if isinstance(a,h5py.Dataset) and a.shape == (): a = a[...]
  if isinstance(a,np.ndarray)   and a.ndim  == 0 : a = a.item()
  if isinstance(a,np.ndarray)   and a.dtype.char == "S": a = a.astype(str)
  if recursive:
    if "items" in dir(a): # dict, h5py groups, npz file
      a = dict(a); # convert to dict, otherwise can't asssign values
      for key,value in a.items(): a[key] = unwrapArray(value)
    elif isinstance(a,list):
      for index in range(len(a)): a[index] = unwrapArray(a[index])
    else:
      pass
  if isinstance(a,dict): a = DataStorage(a)
  # restore None that cannot be saved in h5py
  if isinstance(a,str) and a == "NONE_PYTHON_OBJECT": a = None
  # h5py can't save numpy unicode
  if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)
  return a

def dictToH5Group(d,group):
  """ helper function that transform (recursive) a dictionary into an
      hdf group by creating subgroups """
  for key,value in d.items():
    if isinstance(value,dict):
      group.create_group(key)
      dictToH5Group(value,group[key])
    else:
      # h5py can't handle numpy unicode arrays
      if isinstance(value,np.ndarray) and value.dtype.char == "U":
        value = np.asarray([vv.encode('ascii') for vv in value])
      # h5py can't save None
      if value is None: value="NONE_PYTHON_OBJECT"
      try:
        group[key] = value
      except TypeError:
        log.error("Can't save %s"%(key))

def dictToH5(h5,d):
  """ Save a dictionary into an hdf5 file
      h5py is not capable of handling dictionaries natively"""
  h5 = h5py.File(h5,mode="w")
#  group = h5.create_group("/")
  dictToH5Group(d,h5["/"])
  h5.close()

def h5ToDict(h5,readH5pyDataset=True):
  """ Read a hdf5 file into a dictionary """
  with h5py.File(h5,"r") as h:
    ret = unwrapArray(h,recursive=True,readH5pyDataset=readH5pyDataset)
  return ret

def npzToDict(npzFile):
  with np.load(npzFile) as npz: d = dict(npz)
  d = unwrapArray(d,recursive=True)
  return d

def dictToNpz(npzFile,d): np.savez(npzFile,**d)

def read(fname):
  extension = os.path.splitext(fname)[1]
  log.info("Reading storage file %s"%fname)
  if extension == ".npz": 
    return DataStorage(npzToDict(fname))
  elif extension == ".h5":
    return DataStorage(h5ToDict(fname))
  else:
    raise ValueError("Extension must be h5 or npz, it was %s"%extension) 

def save(fname,d):
  extension = os.path.splitext(fname)[1]
  log.info("Saving storage file %s"%fname)
  try:
    if extension == ".npz": 
      return dictToNpz(fname,d)
    elif extension == ".h5":
      return dictToH5(fname,d)
    else:
      raise ValueError("Extension must be h5 or npz") 
  except Exception as e:
    log.exception("Could not save %s"%fname)

class DataStorage(dict):
  """ Storage for 1d integrated info """
  def __init__(self,fileOrDict,recursive=True,
               default_name='pyfai_1d',default_ext='npz'):
    if isinstance(fileOrDict,dict):
      self.filename = None
      d = fileOrDict
    else:
      assert isinstance(fileOrDict,str)
      if os.path.isdir(fileOrDict):
        fileOrDict = fileOrDict + "/" + default_name + "." + default_ext
      self.filename = fileOrDict
      d = read(fileOrDict)

    if recursive:
      for k in d.keys():
        if not isinstance(d[k],DataStorage) and isinstance(d[k],dict):
          d[k] = DataStorage(d[k])

    # allow accessing with .data, .delays, etc.
    for k,v in d.items(): setattr(self,k,v)

    # allow accessing as proper dict
    self.update( **dict(d) )

  def __setitem__(self, key, value):
    setattr(self,key,value)
    super().__setitem__(key, value)

  def __setattr__(self, key, value):
    """ allows to add fields with data.test=4 """
    super().__setitem__(key, value)
    super().__setattr__(key,value)

  def __delitem__(self, key):
    delattr(self,key)
    super().__delitem__(key)

  def __str__(self):
    keys = list(self.keys())
    keys.sort()
    return "DataStorage obj containing: %s" % ",".join(keys)

  def __repr__(self):
    keys = list(self.keys())
    keys.sort()
    nchars = max(map(len,keys))
    fmt = "%%%ds %%s" % (nchars)
    s = ["DataStorage obj containing (sorted): ",]
    for k in keys:
      if isinstance(self[k],np.ndarray):
        value_str = "array %s"% "x".join(map(str,self[k].shape))
      elif isinstance(self[k],DataStorage):
        value_str = str(self[k])[:50] + "..."
      elif isinstance(self[k],(str,DataStorage)):
        value_str = self[k][:50] + "..."
      elif self[k] is None:
        value_str = "None"
      else:
        value_str = str(self[k])
      s.append( fmt % (k,value_str) ) 
    return "\n".join(s)

  def save(self,fname=None):
    if fname is None: fname = self.filename
    assert fname is not None
    save(fname,dict(self))
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`""" npz/hdf5 file based storage;`
			`this modules adds the possibility to dump and load objects in files and`
			`a more convenient was of accessing the data via the .attributedict thanks`
			`to the DataStorage class """`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`import numpy as np`
			`import os`
			`import h5py`
			`import collections`

more improvements (including some needed to work after background subtraction, see example on salen 2017-01-10 22:43:22 +01:00			`import logging`
			`log = logging.getLogger(__name__) # __name__ is "foo.bar" here`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`def unwrapArray(a,recursive=True,readH5pyDataset=True):`
			`""" This function takes an object (like a dictionary) and recursivively`
			`unwraps it solving many issues like the fact that many objects are`
			`packaged as 0d array`
			`This funciton has also some specific hack for handling h5py limit to`
			`handle for example the None object or the numpy unicode ...`
			`"""`
			`# is h5py dataset convert to array`
			`if isinstance(a,h5py.Dataset) and readH5pyDataset: a = a[...]`
			`if isinstance(a,h5py.Dataset) and a.shape == (): a = a[...]`
			`if isinstance(a,np.ndarray) and a.ndim == 0 : a = a.item()`
			`if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)`
			`if recursive:`
			`if "items" in dir(a): # dict, h5py groups, npz file`
			`a = dict(a); # convert to dict, otherwise can't asssign values`
			`for key,value in a.items(): a[key] = unwrapArray(value)`
			`elif isinstance(a,list):`
new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`for index in range(len(a)): a[index] = unwrapArray(a[index])`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`else:`
			`pass`
			`if isinstance(a,dict): a = DataStorage(a)`
			`# restore None that cannot be saved in h5py`
			`if isinstance(a,str) and a == "NONE_PYTHON_OBJECT": a = None`
			`# h5py can't save numpy unicode`
			`if isinstance(a,np.ndarray) and a.dtype.char == "S": a = a.astype(str)`
			`return a`

lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`def dictToH5Group(d,group):`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`""" helper function that transform (recursive) a dictionary into an`
			`hdf group by creating subgroups """`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`for key,value in d.items():`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`if isinstance(value,dict):`
			`group.create_group(key)`
			`dictToH5Group(value,group[key])`
			`else:`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`# h5py can't handle numpy unicode arrays`
			`if isinstance(value,np.ndarray) and value.dtype.char == "U":`
			`value = np.asarray([vv.encode('ascii') for vv in value])`
			`# h5py can't save None`
			`if value is None: value="NONE_PYTHON_OBJECT"`
more cleanup and improvements; storage can be chosen between npz and h5, data reduction is kind of tested 2017-01-06 15:40:26 +01:00			`try:`
			`group[key] = value`
			`except TypeError:`
			`log.error("Can't save %s"%(key))`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00
			`def dictToH5(h5,d):`
more cleanup and improvements; storage can be chosen between npz and h5, data reduction is kind of tested 2017-01-06 15:40:26 +01:00			`""" Save a dictionary into an hdf5 file`
			`h5py is not capable of handling dictionaries natively"""`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`h5 = h5py.File(h5,mode="w")`
			`# group = h5.create_group("/")`
			`dictToH5Group(d,h5["/"])`
			`h5.close()`

cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`def h5ToDict(h5,readH5pyDataset=True):`
more cleanup and improvements; storage can be chosen between npz and h5, data reduction is kind of tested 2017-01-06 15:40:26 +01:00			`""" Read a hdf5 file into a dictionary """`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`with h5py.File(h5,"r") as h:`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`ret = unwrapArray(h,recursive=True,readH5pyDataset=readH5pyDataset)`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`return ret`

			`def npzToDict(npzFile):`
			`with np.load(npzFile) as npz: d = dict(npz)`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`d = unwrapArray(d,recursive=True)`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`return d`

			`def dictToNpz(npzFile,d): np.savez(npzFile,**d)`

			`def read(fname):`
			`extension = os.path.splitext(fname)[1]`
more id9 stuff including a difference plot with hidable curves 2017-01-06 18:06:34 +01:00			`log.info("Reading storage file %s"%fname)`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`if extension == ".npz":`
new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`return DataStorage(npzToDict(fname))`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`elif extension == ".h5":`
new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`return DataStorage(h5ToDict(fname))`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00			`else:`
			`raise ValueError("Extension must be h5 or npz, it was %s"%extension)`

			`def save(fname,d):`
			`extension = os.path.splitext(fname)[1]`
more id9 stuff including a difference plot with hidable curves 2017-01-06 18:06:34 +01:00			`log.info("Saving storage file %s"%fname)`
new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`try:`
			`if extension == ".npz":`
			`return dictToNpz(fname,d)`
			`elif extension == ".h5":`
			`return dictToH5(fname,d)`
			`else:`
			`raise ValueError("Extension must be h5 or npz")`
			`except Exception as e:`
			`log.exception("Could not save %s"%fname)`
lots of changes, now id9 and pyfai routines have their own wubmodule; worked on storage system (hdf5 or npz based) 2017-01-05 19:22:37 +01:00
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`class DataStorage(dict):`
			`""" Storage for 1d integrated info """`
new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`def __init__(self,fileOrDict,recursive=True,`
			`default_name='pyfai_1d',default_ext='npz'):`
cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`if isinstance(fileOrDict,dict):`
			`self.filename = None`
			`d = fileOrDict`
			`else:`
			`assert isinstance(fileOrDict,str)`
			`if os.path.isdir(fileOrDict):`
			`fileOrDict = fileOrDict + "/" + default_name + "." + default_ext`
			`self.filename = fileOrDict`
			`d = read(fileOrDict)`

new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`if recursive:`
			`for k in d.keys():`
			`if not isinstance(d[k],DataStorage) and isinstance(d[k],dict):`
			`d[k] = DataStorage(d[k])`

cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`# allow accessing with .data, .delays, etc.`
			`for k,v in d.items(): setattr(self,k,v)`

			`# allow accessing as proper dict`
			`self.update( **dict(d) )`

			`def __setitem__(self, key, value):`
			`setattr(self,key,value)`
			`super().__setitem__(key, value)`

improved DataStorage to add key with the syntax a.data=3 2017-01-12 16:35:36 +01:00			`def __setattr__(self, key, value):`
			`""" allows to add fields with data.test=4 """`
			`super().__setitem__(key, value)`
			`super().__setattr__(key,value)`

cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`def __delitem__(self, key):`
			`delattr(self,key)`
			`super().__delitem__(key)`

new submodules (peaks,cell) and new functions (like backgorundSubtraction (in azav.py) 2017-01-13 14:49:48 +01:00			`def __str__(self):`
			`keys = list(self.keys())`
			`keys.sort()`
			`return "DataStorage obj containing: %s" % ",".join(keys)`

			`def __repr__(self):`
			`keys = list(self.keys())`
			`keys.sort()`
			`nchars = max(map(len,keys))`
			`fmt = "%%%ds %%s" % (nchars)`
			`s = ["DataStorage obj containing (sorted): ",]`
			`for k in keys:`
			`if isinstance(self[k],np.ndarray):`
			`value_str = "array %s"% "x".join(map(str,self[k].shape))`
			`elif isinstance(self[k],DataStorage):`
			`value_str = str(self[k])[:50] + "..."`
			`elif isinstance(self[k],(str,DataStorage)):`
			`value_str = self[k][:50] + "..."`
			`elif self[k] is None:`
			`value_str = "None"`
			`else:`
			`value_str = str(self[k])`
			`s.append( fmt % (k,value_str) )`
			`return "\n".join(s)`

cleanup a bit more the storage module ... 2017-01-07 23:53:12 +01:00			`def save(self,fname=None):`
			`if fname is None: fname = self.filename`
			`assert fname is not None`
			`save(fname,dict(self))`