|
from operator import itemgetter; |
|
import os.path; |
|
import re; |
|
import sys; |
|
|
|
from graph import Graph; |
|
|
|
conditions = {"APX": "โ", "EQU": "=", "LEQ": "โค", "LES": "<", "NEQ": "โ ", |
|
"SXN": "ยซ", "SXP": "ยป", "SXY": "โ", "SZN": "\\", "SZP": "/", |
|
"STI": "โ", "STO": "โ", "SY1": "โฅ", "SY2": "โฎ", |
|
"TAB": "โ", "TPR": "โบ"}; |
|
|
|
|
|
|
|
|
|
id_matcher = re.compile(r'^%%% bin/boxer --input (?:[^/]+/)?p([0-9]+)/d([0-9]+)/'); |
|
referent_matcher = re.compile(r'^(b[0-9]+) REF ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
|
condition_matcher = re.compile(r'^(b[0-9]+) (EQU|NEQ|APX|LE[SQ]|TPR|TAB|S[ZX][PN]|ST[IO]|SY[12]|SXY) ([enpstx][0-9]+|"[^"]+") ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
|
role_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ([enpstx][0-9]+) ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
|
concept_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ("[^ ]+") ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
|
discourse_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) (b[0-9]+)(?: (b[0-9]+))? +%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); |
|
empty_matcher = re.compile(r'^ *%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); |
|
|
|
def read(fp, text = None, full = False, reify = False, trace = 0, strict = 0): |
|
|
|
def finish(graph, mapping, finis, scopes): |
|
if reify: |
|
for box, referent, node in finis: |
|
|
|
|
|
|
|
|
|
|
|
|
|
if full \ |
|
or referent[0] == referent[-1] == "\"" \ |
|
or box not in scopes[referent]: |
|
graph.add_edge(mapping[box].id, node.id, "โ"); |
|
else: |
|
for referent in scopes: |
|
if len(scopes[referent]) > 1: |
|
print("pbm.read(): [graph #{}] stray referent โ{}โ in boxes {}." |
|
"".format(graph.id, referent, scopes[referent]), |
|
file=sys.stderr); |
|
|
|
|
|
|
|
for node in graph.nodes: |
|
if node.type == 0 and node.is_root(): node.is_top = True; |
|
|
|
graph = None; id = None; sentence = None; |
|
mapping = dict(); scopes = dict(); finis = list(); |
|
i = 0; |
|
header = 3; |
|
for line in fp: |
|
line = line.rstrip(); i += 1; |
|
if trace: print("{}: {}".format(i, line)); |
|
|
|
|
|
|
|
|
|
if len(line) == 0: |
|
finish(graph, mapping, finis, scopes); |
|
yield graph, None; |
|
graph = None; id = None; |
|
mapping = dict(); scopes = dict(); finis = list(); |
|
header = 3; |
|
continue; |
|
|
|
|
|
|
|
|
|
if header: |
|
if header == 3: pass; |
|
elif header == 2: |
|
match = id_matcher.match(line); |
|
if match is None: |
|
raise Exception("pbm.read(): " |
|
"[line {}] missing identifier in โ{}โ; exit." |
|
"".format(i, line)); |
|
part, document = match.groups(); |
|
id = "{:02d}{:04d}".format(int(part), int(document)); |
|
elif header == 1: |
|
if text is not None and id in text: sentence = text[id]; |
|
else: sentence = line[5:-1]; |
|
graph = Graph(id, flavor = 2, framework = "drg"); |
|
graph.add_input(sentence); |
|
header -= 1; |
|
continue; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
anchor = None; |
|
match = referent_matcher.match(line); |
|
if match is not None: |
|
box, referent, start, end = match.groups(); |
|
if referent in scopes: |
|
if strict and box not in scopes[referent] and reify: |
|
raise Exception("pbm.read(): " |
|
"[line {}] stray referent โ{}โ in box โ{}โ " |
|
"(instead of โ{}โ); exit." |
|
"".format(i, referent, box, scopes[referent])); |
|
else: scopes[referent] = {box}; |
|
if box not in mapping: mapping[box] = graph.add_node(type = 0); |
|
if start is not None and end is not None: |
|
anchor = {"from": int(start), "to": int(end)}; |
|
if referent not in mapping: |
|
mapping[referent] \ |
|
= graph.add_node(anchors = [anchor] if anchor else None); |
|
else: |
|
node = mapping[referent]; |
|
node.add_anchor(anchor); |
|
graph.add_edge(mapping[box].id, mapping[referent].id, "โ"); |
|
else: |
|
match = condition_matcher.match(line); |
|
if match is not None: |
|
box, condition, source, target, start, end = match.groups(); |
|
condition = conditions[condition]; |
|
if source[0] == "\"" and source[-1] == "\"" and source not in mapping: |
|
if start is not None and end is not None: |
|
anchor = {"from": int(start), "to": int(end)}; |
|
mapping[source] \ |
|
= graph.add_node(label = source, |
|
anchors = [anchor] if anchor else None); |
|
elif source not in mapping: mapping[source] = graph.add_node(); |
|
if target[0] == "\"" and target[-1] == "\"" and target not in mapping: |
|
if start is not None and end is not None: |
|
anchor = {"from": int(start), "to": int(end)}; |
|
mapping[target] \ |
|
= graph.add_node(label = target, |
|
anchors = [anchor] if anchor else None); |
|
elif target not in mapping: mapping[target] = graph.add_node(); |
|
if reify: |
|
if box not in mapping: mapping[box] = graph.add_node(type = 0); |
|
node = graph.add_node(label = condition, type = 3); |
|
finis.append((box, source, node)); |
|
graph.add_edge(mapping[source].id, node.id, None); |
|
graph.add_edge(node.id, mapping[target].id, None); |
|
else: |
|
if source in scopes: scopes[source].add(box); |
|
else: scopes[source] = {box}; |
|
graph.add_edge(mapping[source].id, mapping[target].id, condition); |
|
else: |
|
match = role_matcher.match(line); |
|
if match is not None: |
|
box, role, source, target, start, end = match.groups(); |
|
if source not in mapping: mapping[source] = graph.add_node(); |
|
if target[0] == "\"" and target[-1] == "\"" and target not in mapping: |
|
if start is not None and end is not None: |
|
anchor = {"from": int(start), "to": int(end)}; |
|
mapping[target] \ |
|
= graph.add_node(label = target, |
|
anchors = [anchor] if anchor else None); |
|
elif target not in mapping: mapping[target] = graph.add_node(); |
|
if reify: |
|
if box not in mapping: mapping[box] = graph.add_node(type = 0); |
|
node = graph.add_node(label = role, type = 2); |
|
finis.append((box, source, node)); |
|
graph.add_edge(mapping[source].id, node.id, None); |
|
graph.add_edge(node.id, mapping[target].id, None); |
|
else: |
|
if source in scopes: scopes[source].add(box); |
|
else: scopes[source] = {box}; |
|
graph.add_edge(mapping[source].id, mapping[target].id, role); |
|
else: |
|
match = concept_matcher.match(line); |
|
if match is not None: |
|
box, lemma, sense, referent, start, end = match.groups(); |
|
if referent in scopes: |
|
if strict and box not in scopes[referent] and reify: |
|
raise Exception("pbm.read(): " |
|
"[line {}] stray referent โ{}โ in box โ{}โ " |
|
"(instead of โ{}โ); exit." |
|
"".format(i, referent, box, scopes[referent])); |
|
else: scopes[referent] = {box}; |
|
if start is not None and end is not None: |
|
anchor = {"from": int(start), "to": int(end)}; |
|
if referent not in mapping: |
|
mapping[referent] = node \ |
|
= graph.add_node(anchors = [anchor] if anchor else None); |
|
else: |
|
node = mapping[referent]; |
|
node.add_anchor(anchor); |
|
if strict and node.label is not None: |
|
raise Exception("pbm.read(): " |
|
"[line {}] duplicate label โ{}โ on referent โ{}โ " |
|
"(instead of โ{}โ); exit." |
|
"".format(i, lemma, referent, node.label)); |
|
node.label = lemma; |
|
if sense[0] == sense[-1] == "\"": sense = sense[1:-1]; |
|
node.set_property("sense", sense); |
|
else: |
|
match = discourse_matcher.match(line); |
|
if match is not None: |
|
top, relation, one, two = match.groups(); |
|
if one not in mapping: mapping[one] = graph.add_node(type = 0); |
|
if two is not None: |
|
if trace > 1: print("ternary discourse relation"); |
|
if two not in mapping: mapping[two] = graph.add_node(type = 0); |
|
graph.add_edge(mapping[one].id, mapping[two].id, relation); |
|
else: |
|
if top not in mapping: mapping[top] = graph.add_node(type = 0); |
|
graph.add_edge(mapping[top].id, mapping[one].id, relation); |
|
elif empty_matcher.search(line) is None: |
|
raise Exception("pmb.read(): [line {}] invalid clause โ{}โ." |
|
"".format(i, line)); |
|
|
|
|
|
|
|
|
|
|
|
if graph is not None: |
|
finish(graph, mapping, finis, scopes); |
|
yield graph, None; |
|
|
|
|