File size: 3,271 Bytes
ae239a7
 
 
 
 
 
 
 
 
c160ff7
 
 
ae239a7
c160ff7
 
 
2dc9347
c160ff7
2dc9347
 
c160ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
2dc9347
c160ff7
 
 
 
2dc9347
c160ff7
 
 
2dc9347
 
c160ff7
 
 
 
 
 
 
 
 
 
 
 
2dc9347
c160ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
'''
This script is used to generate the most relevant papers of a given title.
    - Search for as many as possible references. For 10~15 keywords, 10 references each.
    - Sort the results from most relevant to least relevant.
    - Return the most relevant using token size.

Note: we do not use this function in auto-draft function. It has been integrated in that.
'''

import os.path
import json
from utils.references import References
from section_generator import keywords_generation # section_generation_bg,  #, figures_generation, section_generation
import itertools
from gradio_client import Client


def generate_raw_references(title, description="",
                            bib_refs=None, tldr=False, max_kw_refs=10,
                            save_to="ref.bib"):
    # load pre-provided references
    ref = References(title, bib_refs)

    # generate multiple keywords for searching
    input_dict = {"title": title, "description": description}
    keywords, usage = keywords_generation(input_dict)
    keywords = list(keywords)
    comb_keywords = list(itertools.combinations(keywords, 2))
    for comb_keyword in comb_keywords:
        keywords.append(" ".join(comb_keyword))
    keywords = {keyword:max_kw_refs for keyword in keywords}
    print(f"keywords: {keywords}\n\n")

    ref.collect_papers(keywords, tldr=tldr)
    # paper_json = ref.to_json()

    with open(save_to, "w") as f:
        json.dump(paper_json, f)

    return save_to, ref # paper_json

def generate_top_k_references(title, description="",
                            bib_refs=None, tldr=False, max_kw_refs=10,  save_to="ref.bib", top_k=5):
    json_path, ref_raw = generate_raw_references(title, description, bib_refs, tldr, max_kw_refs,  save_to)
    json_content = ref_raw.to_json()

    client = Client("https://shaocongma-evaluate-specter-embeddings.hf.space/")
    result = client.predict(
        title,  # str  in 'Title' Textbox component
        json_path,  # str (filepath or URL to file) in 'Papers JSON (as string)' File component
        top_k,  # int | float (numeric value between 1 and 50) in 'Top-k Relevant Papers' Slider component
        api_name="/get_k_relevant_papers"
    )
    with open(result) as f:
        result = json.load(f)
    return result


if __name__ == "__main__":
    import openai
    openai.api_key = os.getenv("OPENAI_API_KEY")

    title = "Using interpretable boosting algorithms for modeling environmental and agricultural data"
    description = ""
    save_to = "paper.json"
    save_to, paper_json = generate_raw_references(title, description, save_to=save_to)

    print("`paper.json` has been generated. Now evaluating its similarity...")

    k = 5
    client = Client("https://shaocongma-evaluate-specter-embeddings.hf.space/")
    result = client.predict(
        title,  # str  in 'Title' Textbox component
        save_to,  # str (filepath or URL to file) in 'Papers JSON (as string)' File component
        k,  # int | float (numeric value between 1 and 50) in 'Top-k Relevant Papers' Slider component
        api_name="/get_k_relevant_papers"
    )

    with open(result) as f:
        result = json.load(f)

    print(result)

    save_to = "paper2.json"
    with open(save_to, "w") as f:
        json.dump(result, f)