XML是实现不同语言或程序之间进行数据交换的协议
对于xml的操作有如下:
# /usr/bin/env python # ! -*- encoding=utf-8 -*- ''' xml的解析方法有两种; 一是通过XML() 转换 二是通过parse()转换 两者不同之处在于xml转换的是element对象,parse转换的是elementtree对象 ''' from xml.etree import ElementTree as ET # xml '''str_xml = open('first.xml','r',encoding='utf-8').read() root = ET.XML(str_xml) print(root,type(root)) #result =#parse tree = ET.parse('first.xml') print(tree,type(tree)) #echo result = root = tree.getroot() print(root,type(root)) #echo result = ''' #------------------------------------------------------------------------ '''节点都拥有的共同属性 tag 节点的名字 attrib 节点的属性 text 节点的值 节点的方法: find(self,path,namepsace=none) 查找指定的节点 makeelement(self,tag,attrib)创建一个新节点 copy(self 返回当前结点的copy append(self,subelement) 为当前节点追加一个节点 extend(self,elements) 为当前节点扩展n个节点 insert(self,index,subelement) 在当前节点插入新节点 remove(self,subelement) 在当前节点中删除某个子节点 findtext(self,path,default=none,namespace=none) 查找第一个寻找到的子节点的值 findall(self,path,namespace=none) 获取所有子节点 iterfind(self,path,namespace=none) 获取所有指定的节点并创建一个迭代器 clear(self) 清空节点 get(self,key,default=none) 获取当前节点的属性值 set(self,key,value) 为当前节点设置属性值 keys(self) 获取当前节点的所有属性的key items(self) 获取当前节点的所有属性值,每一个属性值都是一个键值对 iter(self,tag=none) 在当前节点的子节点中查找所有tag指定的节点,并返回一个迭代器 itertext(self,tag=none) 在当前节点的子节点中查找所有tag所指定的节点的值,并返回一个迭代器 ''' #遍历xml '''tree = ET.parse('first.xml') root = tree.getroot() for i in root: print(i.tag,i.attrib,i.text) for j in i: print(j.tag,j.attrib,j.text)''' #遍历指定的节点 '''tree = ET.parse('first.xml') root = tree.getroot() for i in root.iter('test'): print(i.text)''' #修改/增加/删除/查询 节点中的属性/值, tree = ET.parse('new_first.xml') root = tree.getroot() #增加 属性:值 #attrib for all '''for i in root.iter('test'): i.set('channel','m') tree.write('new_first.xml',encoding='utf-8')''' #for one '''for i in root.iter('test'): if i.text == 'test1': i.text = 'success' tree.write('new_first.xml',encoding='utf-8')''' #删除属性:值 #for all '''for i in root.iter('test'): del i.attrib['channel'] tree.write('new_first.xml',encoding='utf-8')''' #修改属性:值 #for all '''for i in root.iter('test'): i.set('channel','M') tree.write('new_first.xml',encoding='utf-8')''' #查询属性:值 #for all '''for i in root.iter('test'): print(i.attrib)''' #节点的删除和增加 #for all '''for i in root.findall('country'): vle = i.find('test').text if vle == 'success': root.remove(i) tree.write('new_first.xml',encoding='utf-8')''' #xml 文档的创建,三种方式:如下 # One ''''#创建根节点 root = ET.Element('family') #儿子节点 son1 = ET.Element('son1',{'name':'java'}) son2 = ET.Element('son2',{'name':'python'}) #孙子节点 grandson1 = ET.Element('grandson1',{'name':'c++'}) grandson2 = ET.Element('grandson2',{'name':'c'}) #把儿子添加到根节点下 root.append(son1) root.append(son2) #把孙子添加到儿子当中 son1.append(grandson1) son2.append(grandson2) #创建elementTree 对象 tree = ET.ElementTree(root) #写入到文件中: tree.write('test.xml',encoding='utf-8',xml_declaration=True,short_empty_elements=True) ''' # two '''root = ET.Element('family') son1 = root.makeelement('son1',{'name':'java'}) son2 = root.makeelement('son2',{'name':'python'}) grandson1 = son1.makeelement('grandson1',{'name':'c++'}) grandson2 = son2.makeelement('grandson2',{'name':'c'}) root.append(son1) root.append(son2) son1.append(grandson1) son2.append(grandson2) tree = ET.ElementTree(root) tree.write('test1.xml',encoding='utf-8')''' # three root = ET.Element('family') son1 = ET.SubElement(root,'son1',{'name':'java'}) son2 = ET.SubElement(root,'son2',{'name':'python'}) grandson1 = ET.SubElement(son1,'grandson1',{'name':'c++'}) grandson2 = ET.SubElement(son2,'grandson2',{'name':'c'}) #tree = ET.ElementTree(root) #tree.write('test2.xml',encoding='utf-8') #如果需要进行缩进处理,需要引入 xml.dom 的minidom类 from xml.dom import minidom def prettify(node): #将节点转换为字符串并添加缩进 fix_str = ET.tostring(node,'utf-8') reparsed = minidom.parseString(fix_str) return reparsed.toprettyxml(indent='\t') raw_str = prettify(root) f = open('test3.xml','w',encoding='utf-8') f.write(raw_str) f.close() ########################################################## 关于命名空间
命名冲突
在 XML 中,元素名称是由开发者定义的,当两个不同的文档使用相同的元素名时,就会发生命名冲突。
from xml.etree import ElementTree as ETET.register_namespace('com',"http://www.company.com") #some name# build a tree structureroot = ET.Element("{http://www.company.com}STUFF")body = ET.SubElement(root, "{http://www.company.com}MORE_STUFF", attrib={"{http://www.company.com}hhh": "123"})body.text = "STUFF EVERYWHERE!"# wrap it in an ElementTree instance, and save as XMLtree = ET.ElementTree(root)tree.write("page.xml", xml_declaration=True, encoding='utf-8', method="xml")