Spaces:
Runtime error
Runtime error
File size: 7,116 Bytes
4e9d603 0fa757f 9269d50 8746fcc 9269d50 4835e80 9269d50 4835e80 9269d50 4835e80 9269d50 4835e80 9269d50 d39adb8 9269d50 9d8fc70 9269d50 4835e80 318551b 4835e80 15b17df d39adb8 4835e80 d39adb8 15b17df 4835e80 9269d50 4835e80 9d8fc70 4835e80 d39adb8 4835e80 d39adb8 4835e80 d39adb8 e549fa9 d39adb8 4835e80 4e9d603 0fa757f 4835e80 9269d50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
import rebiber
import os
import uuid
# Load Bib Database
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"
bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)
def process(input_bib, shorten, remove_keys, deduplicate, sort):
if "@" not in input_bib:
return "N/A"
global abbr_dict
# print(f"remove_keys={remove_keys}")
random_id = uuid.uuid4().hex
with open(f"input_{random_id}.bib", "w") as f:
f.write(input_bib.replace("\t", " "))
all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
print("# Input Bib Entries:", len(all_bib_entries))
abbr_dict_pass = []
if shorten:
abbr_dict_pass = abbr_dict
rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
abbr_dict=abbr_dict_pass,
deduplicate=deduplicate,
sort=sort,
removed_value_names=remove_keys)
with open(f"output_{random_id}.bib") as f:
output_bib = f.read().replace("\n ", "\n ")
# delete both files
# print(output_bib)
return output_bib, random_id
example_input = """
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{Lin2020CommonGenAC,
title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
booktitle={Findings},
year={2020}
}
"""
examples = [[example_input]]
# iface = gr.Interface(fn=process,
# inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
# examples=examples,
# allow_flagging="never"
# )
with gr.Blocks() as demo:
gr.Markdown(
'''# Rebiber: A tool for normalizing bibtex with official info.
<table>
<tr>
<td>
<a href="https://yuchenlin.xyz/">
<img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
</a>
</td>
<td>
<a href="https://github.com/yuchenlin/rebiber">
<img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
</a>
</td>
<td>
<a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
<img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
</a>
</td>
</tr>
</table>
<span style="font-size:13pt">
We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
</span>
'''
)
with gr.Row():
with gr.Column(scale=3):
input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"],
value=[False, False, False, False, False, False, False, False],
label="Remove Keys", info="Which keys to remove?")
shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)
dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
with gr.Row():
clr_button = gr.Button("Clear")
button = gr.Button("Submit")
ex_uuid = gr.Text(label="UUID")
ex_uuid.visible = False
with gr.Column(scale=3):
output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
download_btn = gr.Button("Generate Bib File")
download_content = gr.outputs.File()
download_content.visible = False
def download_file(ex_uuid):
global download_content
# Replace this with your code to generate/download the file
file_path = f"output_{ex_uuid}.bib"
download_content.update(visible=False)
return file_path, gr.update(visible=True)
download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid], api_name = "process")
def clean(text):
return ""
clr_button.click(clean, input_bib, input_bib)
# gr.Interface(fn=process,
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
# examples=examples,
# allow_flagging="never",
# scroll_to_output=True,
# show_progress=True,
# )
if __name__ == "__main__":
demo.launch()
"""
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{lin2020birds,
address = {Online},
author = {Lin, Bill Yuchen and
Lee, Seyeon and
Khanna, Rahul and
Ren, Xiang},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
doi = {10.18653/v1/2020.emnlp-main.557},
pages = {6862--6868},
publisher = {Association for Computational Linguistics},
title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
url = {https://aclanthology.org/2020.emnlp-main.557},
year = {2020}
}
""" |