# -*- coding: utf-8 -*- """ svg parser """ # imports #################################################################### # import sys import os import logging import xml.parsers.expat import gzip from tempfile import mkstemp from base64 import b64decode from collections import defaultdict from .. import scenegraph as sg # logging #################################################################### log = logging.getLogger(__name__) # utils ###################################################################### def ascii(v, _=None): return str(v) def string_to_number(v, _=None): try: return int(v) except ValueError: scale = 1. if v.endswith(r"%"): scale = 100. v = v[:-len(r"%")] return float(v) / scale def replace(s, *pairs): for before, after in pairs: s = s.replace(before, after) return s # sublanguages ############################################################### def unquote(v): b, e = v[0], v[-1] if b == e and b in ["'", '"']: v = v[1:-1] return v def attributify(style): attributes = [p.strip().split(":") for p in style.split(";") if p] return dict((k.strip(), unquote(v.strip())) for (k, v) in attributes) def styles(cdata): """ :param str cdata: """ _styles = defaultdict(dict) cdata = replace(cdata, ("{", " { "), ("}", " } ")) cdata = iter(cdata.split()) for token in cdata: if token == "/*": while not next(cdata) == "*/": pass try: token = next(cdata) except StopIteration: continue while not token.startswith("{"): key = token # TODO: properly implement css selectors token = next(cdata, "{}") content = token for token in cdata: content += token if content.endswith("}"): break _styles[key].update(attributify(content[len("{"):-len("}")])) return _styles def asciify_key(k): k = ascii(k) for c in "-:": k = k.replace(c, '_') if k in ["id", "class"]: k = "_%s" % k if k == "xlink_href": k = "href" return k def asciify_keys(d): return dict((asciify_key(k), d[k]) for k in d) def switify_values(d, elements): """ :param d: a dictionary mapping xml attribute names (string) to their values (as strings) :return : a dictionary mapping xml attribute names (string) to their parsed values (which type depends on the attribute (eg scenegraph.Color, float, etc.) """ return dict((k, svg_attributes_parsers[k](d[k], elements)) for k in d) _UNITS = { # http://www.w3.org/TR/SVG/coords.html#Units "px": 1., "pt": 1.25, "pc": 15, "em": 10, # TODO: this should be dependant on current font-size "ex": 5, # TODO: this should be dependant on current x-height "mm": 3.543307, "cm": 35.43307, "in": 90., } _SIZE_FACTOR = 1.2 _MEDIUM = 12 * _UNITS["pt"] _ABSOLUTE_SIZES = { "xx-small": _MEDIUM * _SIZE_FACTOR ** -3, "x-small": _MEDIUM * _SIZE_FACTOR ** -2, "small": _MEDIUM * _SIZE_FACTOR ** -1, "medium": _MEDIUM, "large": _MEDIUM * _SIZE_FACTOR ** 1, "x-large": _MEDIUM * _SIZE_FACTOR ** 2, "xx-large": _MEDIUM * _SIZE_FACTOR ** 3, } def string_to_length(v, _=None): v = v.lower() if v in _ABSOLUTE_SIZES: return _ABSOLUTE_SIZES[v] u = 1. unit = v[-2:] if unit in _UNITS: u = _UNITS[unit] v = v[:-2] return u * string_to_number(v) def string_to_length_list(v, _=None): if v == 'none': return None v = replace(v, (",", " ")) v = list(string_to_length(u) for u in v.split()) if len(v) == 1: return v[0] return v def string_to_color(v, elements=None): if v == "currentColor": v = "current" if elements is None: elements = {} if hasattr(sg.Color, v): # named color return getattr(sg.Color, v) if v.startswith("rgb(") and v.endswith(")"): # rgb rgb = v[len("rgb("):-len(")")] r, g, b = (string_to_number(u) for u in rgb.split(",")) return sg.Color(r, g, b) if v.startswith("#"): # raw color rrggbb = v[len('#'):] if len(rrggbb) == 3: rrggbb = "".join(c * 2 for c in rrggbb) rr, gg, bb = rrggbb[:2], rrggbb[2:4], rrggbb[4:] return sg.Color(*(int(u, 16) for u in (rr, gg, bb))) if v.startswith("url(#"): # def url = v[len("url(#"):-len(")")] if url in elements: return get_pserver(elements, url) log.warning("unknown color %s", v) return "unknown" def transform(v): """creates and returns an object representing a 2D transformation, initialized from its svg transform attribute :param v: the value of single transform in a svg transform attribute (for example 'rotate(30 20,40)') :rtype: an object of a class derived from _Transform """ _transform, v = v.split("(") transform_class = { "translate": sg.Translate, "rotate": sg.Rotate, "scale": sg.Scale, "skewX": sg.SkewX, "skewY": sg.SkewY, "matrix": sg.Matrix, }[_transform] return transform_class(*(string_to_number(u) for u in v.split())) def string_to_transform_list(v, _=None): """creates and returns a list of objects (each of them representing a 2D transformation) , initialized from the value of a svg transform attribute :param v: the value of a svg transform attribute (for example 'rotate(30 20,40) translate(13,15)') :rtype: a list of objects of a class derived from scenegraph.transform._Transform (eg [scenegraph.transform.Rotate(), scenegraph.transform.Translate()] ) """ v = replace(v, (" (", "("), (",", " ")) return [transform(t.strip()) for t in v.split(")")[:-1]] def string_to_url(v, elements=None): if elements is None: elements = {} if v == "none": return None assert v.startswith("url(#"), v url = v[len("url(#"):-len(")")] return elements.get(url, str(url)) def string_to_href(v, _=None): if v.startswith("file://"): v = v[len("file://"):] elif v.startswith("data:image/"): ext, data = v[len("data:image/"):].split(";", 1) assert data.startswith("base64,") data = data[len("base64,"):] data = data.encode("ascii") data = b64decode(data) try: data = gzip.decompress(data) # @UndefinedVariable except OSError: pass _, v = mkstemp(".%s" % ext) with open(v, "wb") as _image: _image.write(data) return ascii(v) _PATH_COMMANDS = "MLVHCSQTAZmlvhcsqtaz" def pop1(v): return string_to_number(v.pop()) def pop2(v): return (pop1(v), pop1(v)) _POPPERS = defaultdict(list, { 'M': [pop2], 'L': [pop2], 'V': [pop1], 'H': [pop1], 'C': [pop2, pop2, pop2], 'S': [pop2, pop2], 'Q': [pop2, pop2], 'T': [pop2], 'A': [pop2, pop1, pop2, pop2], }) def string_to_path_data(v, _=None): v = replace(v, ("-", " -"), ("e -", "e-"), ("E -", "E-"), (",", " "), *((c, " %s " % c) for c in _PATH_COMMANDS)) v = list(reversed(v.split())) d, last_c = [], 'M' while v: c = v.pop() if c not in _PATH_COMMANDS: v.append(c) c = last_c d.append(c) last_c = c if last_c == 'M': last_c = 'L' if last_c == 'm': last_c = 'l' for popper in _POPPERS[c.upper()]: d.append(popper(v)) return d def string_to_point_list(v, _=None): v = replace(v, ("-", " -"), ("e -", "e-"), ("E -", "E-"), (",", " ")) v = list(reversed(v.split())) d = [] while v: try: d.append(pop2(v)) except IndexError: break return d svg_attributes_parsers = defaultdict(lambda: lambda a, _: ascii(a), { "x": string_to_length_list, "y": string_to_length_list, "rx": string_to_length, "ry": string_to_length, "x1": string_to_length, "y1": string_to_length, "x2": string_to_length, "y2": string_to_length, "width": string_to_length, "height": string_to_length, "font_size": string_to_length, "stroke_width": string_to_length, "stroke_miterlimit": string_to_number, "stroke_opacity": string_to_number, "stroke_dasharray": string_to_length_list, "stroke_dashoffset": string_to_length, "fill_opacity": string_to_number, "opacity": string_to_number, "color": string_to_color, "fill": string_to_color, "stroke": string_to_color, "transform": string_to_transform_list, "clip_path": string_to_url, "mask": string_to_url, "href": string_to_href, "d": string_to_path_data, "points": string_to_point_list, "cx": string_to_length, "cy": string_to_length, "r": string_to_length, "fx": string_to_length, "fy": string_to_length, "gradientTransform": string_to_transform_list, "patternTransform": string_to_transform_list, "viewBox": string_to_length_list, }) # gradient ################################################################### def stop(offset, stop_color="none", stop_opacity=None, **_): o, c = string_to_number(offset), string_to_color(stop_color) if stop_opacity is None: return o, c return o, c, string_to_number(stop_opacity) # parser class ############################################################### class Parser(object): def __init__(self): self.expat_parser = xml.parsers.expat.ParserCreate() self.expat_parser.StartElementHandler = self.start_element self.expat_parser.EndElementHandler = self.end_element self.expat_parser.CharacterDataHandler = self.char_data self.expat_parser.ProcessingInstructionHandler = self.proc_instruction self.cdata = None self.pserver_kwargs = None self.gradient_stops = None self.reset() def proc_instruction(self, target, data): if target != 'xml-stylesheet': return if not data.startswith('type="text/css"'): return b = data.find('href="') if b < 0: return b += len('href="') e = data.find('"', b) css_name = data[b:e] style = styles(open(css_name).read()) self.styles.update(style) def reset(self, **attributes): self.root = sg.Group(**attributes) self.group_stack = [self.root] # the group stack representing the current node path during parsing self.elements = {"__root__": self.root} self.cdata = [] self.texts = [] self.gradient_stops = [] self.uses = defaultdict(list) self.clippeds = defaultdict(list) self.maskeds = defaultdict(list) self.styles = defaultdict(dict) def parse(self, document): self.expat_parser.Parse(document) for _id in self.uses: log.warning("undefined reference #%s replaced by empty group", _id) for use in self.uses[_id]: use.element = sg.Group() for _id in self.clippeds: log.warning("undefined clipPath #%s replaced by none", _id) for clipped in self.clippeds[_id]: clipped.clip_path = None for _id in self.maskeds: log.warning("undefined mask #%s replaced by none", _id) for masked in self.maskeds[_id]: masked.mask = None def char_data(self, data): self.cdata.append(data) def start_element(self, name, attributes): """expat handler for the start of an xml tag :param str name: the xml node's name (eg circle) :param atttributes: the xml node's attributes, in the form of a dictionary mapping attribute names to their values (eg {'width':'50', 'height':70}) """ name = name.split(":")[-1] if "style" in attributes: style = attributes.pop("style") attributes.update(attributify(style)) if "class" in attributes: key = ".%s" % attributes["class"] attributes.update(self.styles[key]) if "id" in attributes: key = "#%s" % attributes["id"] attributes.update(self.styles[key]) attributes.update(self.styles[name]) attributes.update(self.styles["*"]) attributes = asciify_keys(attributes) attributes = switify_values(attributes, self.elements) for k in "color", "fill", "stroke": if attributes.get(k, None) == "unknown": del attributes[k] try: handler = getattr(self, "open_%s" % name) except AttributeError: try: handler = { "svg": sg.Group, "g": sg.Group, "symbol": sg.Group, "a": sg.Group, "defs": sg.Group, "clipPath": sg.Group, "mask": sg.Group, "path": sg.Path, "rect": sg.Rectangle, "circle": sg.Circle, "ellipse": sg.Ellipse, "line": sg.Line, "polyline": sg.Polyline, "polygon": sg.Polygon, }[name] except KeyError: log.warning("unhandled %s element", name) return elem = handler(**attributes) if elem is None: return if "clip_path" in attributes: clipPath = attributes["clip_path"] if isinstance(clipPath, str): self.clippeds[clipPath].append(elem) if "mask" in attributes: mask = attributes["mask"] if isinstance(mask, str): self.maskeds[mask].append(elem) if "_id" in attributes: _id = attributes["_id"] self.elements[_id] = elem for use in self.uses.pop(_id, []): use.element = elem for clipped in self.clippeds.pop(_id, []): clipped.clip_path = elem for masked in self.maskeds.pop(_id, []): masked.mask = elem if name in ["defs", "clipPath", "mask", "pattern"]: elem.tag = name else: # set this element as a child to its parent group self.group_stack[-1].add_child(elem) if isinstance(elem, sg.Group): self.group_stack.append(elem) def end_element(self, name): name = name.split(":")[-1] try: handler = getattr(self, "close_%s" % name) except AttributeError: pass else: handler() def close_g(self): return self.group_stack.pop() close_symbol = close_g close_a = close_g close_defs = close_g close_svg = close_g def close_clipPath(self): return fix_clip_attributes(self.close_g()) def close_mask(self): return fix_mask_attributes(self.close_g()) def open_style(self, **attributes): self.cdata = [] def close_style(self): self.styles.update(styles("".join(self.cdata))) self.cdata = [] def open_text(self, **attributes): text = sg.Text("", **attributes) self.texts.append(text) self.cdata = [] return text def close_text(self): text = self.texts.pop() text.text = "".join(self.cdata) def open_use(self, **attributes): _href = attributes.pop("href") try: external, _id = _href.split("#") except ValueError: external, _id = _href, "__root__" if external: # TODO: cache parsers if the same external file is reused with != _id parser = Parser() cwd = os.getcwd() path, filename = os.path.split(external) if path: os.chdir(path) if filename.endswith('z'): f = gzip.open(filename) else: f = open(filename, "rb") try: parser.parse(f.read()) finally: f.close() os.chdir(cwd) else: parser = self element = parser.elements.get(_id, None) use = sg.Use(element, **attributes) if element is None: self.uses[_id].append(use) return use def open_image(self, **attributes): href = attributes["href"] if "svg" in href.rsplit(".")[-1]: return self.open_use(**attributes) return sg.Image(**attributes) def open_pserver(self, **attributes): self.pserver_kwargs = attributes def close_pserver(self, PaintServer): kwargs = self.pserver_kwargs _id = kwargs.pop("_id") _href = kwargs.pop("href", None) if _href: assert _href.startswith("#") kwargs["parent"] = _href[len("#"):] if self.gradient_stops: kwargs["stops"] = [stop(**s) for s in self.gradient_stops] self.gradient_stops = [] self.elements[_id] = (PaintServer, kwargs) open_linearGradient = open_radialGradient = open_pserver def open_pattern(self, **attributes): self.open_pserver(**attributes) return sg.Group() def close_linearGradient(self): return self.close_pserver(sg.LinearGradient) def close_radialGradient(self): return self.close_pserver(sg.RadialGradient) def close_pattern(self): self.pserver_kwargs["pattern"] = self.close_g() return self.close_pserver(sg.Pattern) def open_stop(self, **attributes): self.gradient_stops.append(attributes) def get_pserver(elements, _id): pserver = elements[_id] try: PaintServer, kwargs = pserver except TypeError: pass else: if "parent" in kwargs: parent_id = kwargs["parent"] parent = get_pserver(elements, parent_id) else: parent = None pserver = elements[_id] = PaintServer(parent, **{k: kwargs[k] for k in kwargs if k in PaintServer._DEFAULTS}) return pserver def fix_clip_attributes(clip): try: clip.fill_rule = clip.clip_rule except AttributeError: pass if isinstance(clip, sg.Group): clip.fill = sg.Color.white clip.fill_opacity = 1. clip.stroke = None clip.opacity = 1. for child_clip in clip.children: fix_clip_attributes(child_clip) else: for attr in ["fill", "fill_opacity", "stroke", "opacity"]: try: delattr(clip, attr) except AttributeError: pass return clip def fix_mask_attributes(mask): for attr in ["x", "y", "width", "height"]: try: delattr(mask, attr) except AttributeError: pass return mask def parse(document, logging_level=logging.ERROR): log.setLevel(logging_level) parser = Parser() parser.parse(document) return parser.root