:):)
Я делаю так. Запускаю утилиту конвертации xml в yaml и дальше читаю yaml - это сразу объект питона…
Утилита ниже:
#!/usr/bin/env python
import yaml
from lxml import etree
import sys
import codecs
import re
def convertXml2Yaml(inFileName,outFileName):
doc = etree.parse(inFileName)
root = doc.getroot()
if root.nsmap:
res={}
res["nsmap"]=root.nsmap
# Convert the DOM tree into "YAML-able" data structures.
nsi= dict([(v,k) for k,v in root.nsmap.iteritems()])
res["root"]=convertXml2YamlAux(root,nsi)
else:
res=convertXml2YamlAux(root,{})
# Ask YAML to dump the data structures to a string.
with codecs.open(outFileName,"w",encoding="utf-8") as f:
yaml.safe_dump(res,f,allow_unicode=True)
def reduce_name(name,nsi):
fnd=re.match(ur"{([^}]+?)}(.+)",name)
if fnd:
if fnd.group(1) in nsi:
return "{k}:{v}".format(k=nsi[fnd.group(1)],v=fnd.group(2))
else:
return "{k}:{v}".format(k=fnd.group(1),v=fnd.group(2))
return name
u"""elements: [name(tag), attr, text,children]"""
def convertXml2YamlAux(obj,nsi):
# Add the element name.
nm=reduce_name(obj.tag,nsi)
text=obj.text
if text:
text=text.strip()
else:
text=""
attr=dict([(reduce_name(k,nsi),v) for k,v in obj.attrib.iteritems()])
if text:
attr["t"]=text
childr = [convertXml2YamlAux(i,nsi) for i in obj.iterchildren()]
res=[nm]
if attr:
res.append(attr) # attr is mapping
if childr:
res.append(childr) # children is list
return res
def convertXml2YamlAux(obj,nsi):
# Add the element name.
nm=reduce_name(obj.tag,nsi)
text=obj.text
if text:
text=text.strip()
else:
text=""
attr=dict([(reduce_name(k,nsi),v) for k,v in obj.attrib.iteritems()])
if text:
attr["t"]=text
childr = [convertXml2YamlAux(i,nsi) for i in obj.iterchildren()]
res=[nm]
if attr:
res.append(attr) # attr is mapping
if childr:
res.append(childr) # children is list
return res
def ld(fil):
with codecs.open(fil,"r",encoding="utf-8") as f:
x = yaml.load(f)
return x
def sv(fil,data):
with codecs.open(fil,"w",encoding="utf-8") as f:
yaml.safe_dump(data,f,allow_unicode=True)
def main():
convertXml2Yaml(sys.argv[1],sys.argv[2])
if __name__ == '__main__':
main()
sv и ld для упрощения чтения yaml
обратите внимание - предполагается что все будет в utf-8