|
import csv
|
|
import xml.etree.ElementTree as ET
|
|
import glob, os
|
|
|
|
rootFolder = "c:/317"
|
|
|
|
file = open(rootFolder + "/result.csv", "w", encoding="utf-8")
|
|
file.write("prompt,text,rejected_text\n")
|
|
|
|
def parseXML(xmlFile):
|
|
|
|
prompt = xmlFile.replace("Using_", "").replace(".xml", "").replace(".", " ").replace("_", " ")
|
|
text = ""
|
|
|
|
try:
|
|
tree = ET.parse(rootFolder + "/" + xmlFile)
|
|
root = tree.getroot()
|
|
|
|
for item in root.findall(".//text"):
|
|
text += item.text
|
|
|
|
if text.find("а") == -1:
|
|
|
|
file.write(prompt + "," + text.replace(",", " ") + "\n")
|
|
|
|
|
|
|
|
except:
|
|
print("=======")
|
|
|
|
|
|
os.chdir(rootFolder)
|
|
for xmlFile in glob.glob("*.xml"):
|
|
print(xmlFile)
|
|
parseXML(xmlFile)
|
|
|
|
|
|
|
|
|
|
file.close() |