bowdbeg commited on
Commit
46417fa
·
1 Parent(s): ba888e1
Files changed (3) hide show
  1. .gitignore +164 -0
  2. docred.py +9 -8
  3. sample.py +12 -3
.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .vscode
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
112
+ .pdm.toml
113
+ .pdm-python
114
+ .pdm-build/
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
docred.py CHANGED
@@ -83,6 +83,7 @@ class docred(evaluate.Metric):
83
  "evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))),
84
  },
85
  }
 
86
 
87
  def _info(self):
88
 
@@ -116,9 +117,9 @@ class docred(evaluate.Metric):
116
  labels = self._convert_labels_to_list(data["labels"])
117
  for label in labels:
118
  rel = label["relation_id"]
119
- for n1 in vertexSet[label["head"]]["name"]:
120
- for n2 in vertexSet[label["tail"]]["name"]:
121
- facts.add((n1, n2, rel))
122
  return facts
123
 
124
  def _convert_to_relation_set(self, data):
@@ -198,15 +199,15 @@ class docred(evaluate.Metric):
198
  # if in_train_distant:
199
  # correct_in_train_distant += 1
200
 
201
- re_p = 1.0 * correct_re / (len(submission_answer) + 1e-5)
202
- re_r = 1.0 * correct_re / (tot_relations + 1e-5)
203
  if re_p + re_r == 0:
204
  re_f1 = 0
205
  else:
206
  re_f1 = 2.0 * re_p * re_r / (re_p + re_r)
207
 
208
  evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0
209
- evi_r = 1.0 * correct_evidence / tot_evidences
210
  if evi_p + evi_r == 0:
211
  evi_f1 = 0
212
  else:
@@ -215,10 +216,10 @@ class docred(evaluate.Metric):
215
  re_p_ignore_train_annotated = (
216
  1.0
217
  * (correct_re - correct_in_train_annotated)
218
- / (len(submission_answer) - correct_in_train_annotated + 1e-5)
219
  )
220
  # re_p_ignore_train = (
221
- # 1.0 * (correct_re - correct_in_train_distant) / (len(submission_answer) - correct_in_train_distant + 1e-5)
222
  # )
223
 
224
  if re_p_ignore_train_annotated + re_r == 0:
 
83
  "evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))),
84
  },
85
  }
86
+ eps = 1e-12
87
 
88
  def _info(self):
89
 
 
117
  labels = self._convert_labels_to_list(data["labels"])
118
  for label in labels:
119
  rel = label["relation_id"]
120
+ for n1 in vertexSet[label["head"]]:
121
+ for n2 in vertexSet[label["tail"]]:
122
+ facts.add((n1["name"], n2["name"], rel))
123
  return facts
124
 
125
  def _convert_to_relation_set(self, data):
 
199
  # if in_train_distant:
200
  # correct_in_train_distant += 1
201
 
202
+ re_p = 1.0 * correct_re / (len(submission_answer) + self.eps)
203
+ re_r = 1.0 * correct_re / (tot_relations + self.eps)
204
  if re_p + re_r == 0:
205
  re_f1 = 0
206
  else:
207
  re_f1 = 2.0 * re_p * re_r / (re_p + re_r)
208
 
209
  evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0
210
+ evi_r = 1.0 * correct_evidence / (tot_evidences + self.eps)
211
  if evi_p + evi_r == 0:
212
  evi_f1 = 0
213
  else:
 
216
  re_p_ignore_train_annotated = (
217
  1.0
218
  * (correct_re - correct_in_train_annotated)
219
+ / (len(submission_answer) - correct_in_train_annotated + self.eps)
220
  )
221
  # re_p_ignore_train = (
222
+ # 1.0 * (correct_re - correct_in_train_distant) / (len(submission_answer) - correct_in_train_distant + self.eps)
223
  # )
224
 
225
  if re_p_ignore_train_annotated + re_r == 0:
sample.py CHANGED
@@ -3,8 +3,17 @@ import evaluate
3
 
4
  from docred import docred
5
 
6
- train_data = datasets.load_dataset("docred", split="train_annotated[:10]")
7
- data = datasets.load_dataset("docred", split="validation[:10]")
 
8
  metric = docred()
9
 
10
- print(metric.compute(predictions=data.to_list(), references=data.to_list()))
 
 
 
 
 
 
 
 
 
3
 
4
  from docred import docred
5
 
6
+ train_data = datasets.load_dataset("docred", split="train_annotated[:100]").to_list()
7
+ pred_data = datasets.load_dataset("docred", split="validation[:10]").to_list()
8
+ gold_data = datasets.load_dataset("docred", split="validation[:10]").to_list()
9
  metric = docred()
10
 
11
+ # gold_data[0]["labels"] = {k: [] for k, v in pred_data[0]["labels"].items()}
12
+
13
+ # for i in range(len(gold_data)):
14
+ # gold_data[i]["labels"] = {k: [] for k, v in pred_data[i]["labels"].items()}
15
+
16
+ for i in range(len(pred_data)):
17
+ pred_data[i]["labels"] = {k: [] for k, v in pred_data[i]["labels"].items()}
18
+
19
+ print(metric.compute(predictions=pred_data, references=gold_data, train_data=train_data))