清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import xml.sax
import os,sys
class WordsHandler(xml.sax.ContentHandler):
def __init__(self):
self.CurrantTag = ''
self.Word = ''
self.Trans = ''
self.wordfile = open('words.txt', 'w')
def startElement(self, tag, attributes):
self.CurrantTag = tag
def endElement(self, tag):
if self.CurrantTag == 'word':
self.wordfile.write(self.Word.encode("utf-8") + '\n')
print "Word : ", self.Word
elif self.CurrantTag == 'trans':
print "Trans : ", self.Trans
self.CurrantTag = ''
def characters(self, contant):
if self.CurrantTag == 'word':
self.Word = contant
elif self.CurrantTag == 'trans':
self.Trans = contant
def endDocument(self):
self.wordfile.close()
if __name__ == '__main__':
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
handler = WordsHandler()
parser.setContentHandler(handler)
parser.parse("words.xml")