File size: 12,988 Bytes
826babd
 
 
 
 
e0a3af3
826babd
 
 
 
 
 
 
 
4d24f6b
826babd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6daa349
826babd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d24f6b
826babd
 
 
 
 
 
 
 
e0a3af3
826babd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
#from haystack.components.generators import HuggingFaceTGIGenerator
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.llms import ChatMessage, MessageRole
from llama_index.prompts import ChatPromptTemplate
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext #, LLMPredictor, StorageContext, load_index_from_storage
import gradio as gr
#import sys
#import logging
#import torch
#from huggingface_hub import InferenceClient
#import tqdm as notebook_tqdm
import requests
import os
import json

#generator = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1")
#generator.warm_up()

def download_file(url, filename):
    """
    Download a file from the specified URL and save it locally under the given filename.
    """

    response = requests.get(url, stream=True)

    # Check if the request was successful

    if filename in os.listdir('content/'): return
    if filename == '': return

    if response.status_code == 200:
        with open('content/' + filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:  # filter out keep-alive new chunks
                    file.write(chunk)
        print(f"Download complete: {filename}")
    else:
        print(f"Error: Unable to download file. HTTP status code: {response.status_code}")

#def save_answer(prompt, rag_answer, norag_answer):
#    json_dict = dict()
#    json_dict['prompt'] = prompt
#    json_dict['rag_answer'] = rag_answer
#    json_dict['norag_answer'] = norag_answer
#    
#    file_path = 'saved_answers.json'
#    
#    # Check if the file exists
#    if not os.path.isfile(file_path):
#        with open(file_path, 'w') as f:
#            # Create an empty list in the file to store dictionaries
#            json.dump([], f)
#            f.write('\n')  # Add a newline to separate the list and future entries
#    
#    # Open the file in append mode
#    with open(file_path, 'a+') as f:
#        # Read the existing data
#        f.seek(0)
#        data = json.load(f)
#        
#        # Append the new dictionary to the list
#        data.append(json_dict)
#        
#        # Move the cursor to the beginning of the file
#        f.seek(0)
#        
#        # Write the updated list of dictionaries
#        json.dump(data, f)
#        f.write('\n')  # Add a newline to separate the list and future entries
#
#
#def check_answer(prompt):
#    file_path = 'saved_answers.json'
#
#    if not os.path.isfile(file_path):
#        with open(file_path, 'w') as f:
#            # Create an empty list in the file to store dictionaries
#            json.dump([], f)
#            f.write('\n')  # Add a newline to separate the list and future entries
#    with open('saved_answers.json', 'r') as f:
#        data = json.load(f)
#        for entry in data:
#            if entry['prompt'] == prompt:
#                return entry['rag_answer'], entry['norag_answer']
#    return None, None  # Return None if the prompt is not found


def save_answer(prompt, rag_answer, norag_answer):
    file_path = 'saved_answers.jsonl'

    # Create a dictionary for the current answer
    json_dict = {
        'prompt': prompt,
        'rag_answer': rag_answer,
        'norag_answer': norag_answer
    }

    # Check if the file exists, and create it if not
    #if not os.path.isfile(file_path):
    #    with open(file_path, 'w') as f:
    #        # Create an empty list in the file to store dictionaries
    #        json.dump([], f)
    #        f.write('\n')  # Add a newline to separate the list and future entries

    # Load existing data from the file
    existing_data = load_jsonl(file_path)

    # Append the new answer to the existing data
    existing_data.append(json_dict)

    # Save the updated data back to the file
    write_to_jsonl(file_path, existing_data)

def check_answer(prompt):
    file_path = 'saved_answers.jsonl'

    ## Check if the file exists, and create it if not
    #if not os.path.isfile(file_path):
    #    with open(file_path, 'w') as f:
    #        # Create an empty list in the file to store dictionaries
    #        json.dump([], f)
    #        f.write('\n')  # Add a newline to separate the list and future entries

    # Load existing data from the file

    try:
        existing_data = load_jsonl(file_path)

    except: 
        return None, None

    if len(existing_data) == 0:
        return None, None

    # Find the answer for the given prompt, if it exists
    for entry in existing_data:
        if entry['prompt'] == prompt:
            return entry['rag_answer'], entry['norag_answer']

    # Return None if the prompt is not found
    return None, None

# Helper functions
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            # Each line is a JSON object
            item = json.loads(line)
            data.append(item)
    return data

def write_to_jsonl(file_path, data):
    with open(file_path, 'a+') as file:
        for item in data:
            # Convert Python object to JSON string and write it to the file
            json_line = json.dumps(item)
            file.write(json_line + '\n')



def generate(prompt, history, rag_only, file_link, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,):
    
    rag_answer, norag_answer = check_answer(prompt)

    if rag_answer != None:
        if rag_only:
            return f'* Mixtral + RAG Output:\n{rag_answer}'
        else:
            return f'* Mixtral Output:\n{norag_answer}\n\n* Mixtral + RAG Output:\n{rag_answer}'
    
    mixtral = HuggingFaceInferenceAPI(
        model_name="mistralai/Mixtral-8x7B-Instruct-v0.1"
                   #Mistral-7B-Instruct-v0.2
    )

    service_context = ServiceContext.from_defaults(
        llm=mixtral, embed_model="local:BAAI/bge-small-en-v1.5"
    )

    download = download_file(file_link,file_link.split("/")[-1])

    documents = SimpleDirectoryReader("content/").load_data()

    index = VectorStoreIndex.from_documents(documents,service_context=service_context)

    # Text QA Prompt
    chat_text_qa_msgs = [
        ChatMessage(
            role=MessageRole.SYSTEM,
            content=(
                "Always answer the question, even if the context isn't helpful."
            ),
        ),
        ChatMessage(
            role=MessageRole.USER,
            content=(
                "Context information is below.\n"
                "---------------------\n"
                "{context_str}\n"
                "---------------------\n"
                "Given the context information and not prior knowledge, "
                "answer the question: {query_str}\n"
            ),
        ),
    ]
    text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

    # Refine Prompt
    chat_refine_msgs = [
        ChatMessage(
            role=MessageRole.SYSTEM,
            content=(
                "Always answer the question, even if the context isn't helpful."
            ),
        ),
        ChatMessage(
            role=MessageRole.USER,
            content=(
                "We have the opportunity to refine the original answer "
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{context_msg}\n"
                "------------\n"
                "Given the new context, refine the original answer to better "
                "answer the question: {query_str}. "
                "If the context isn't useful, output the original answer again.\n"
                "Original Answer: {existing_answer}"
            ),
        ),
    ]
    refine_template = ChatPromptTemplate(chat_refine_msgs)

    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    stream= index.as_query_engine(
          text_qa_template=text_qa_template, refine_template=refine_template, similarity_top_k=6, temperature = temperature,
          max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty = repetition_penalty
          ).query(prompt)
    print(str(stream))

    output_rag= str(stream) #""

    #output_norag = mixtral.complete(prompt, details=True, similarity_top_k=6, temperature = temperature,
    #      max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty = repetition_penalty)

    #for response in str(stream):
    #      output += response
    #      yield output

    #print(output_norag)

    
    #result = generator.run(prompt, generation_kwargs={"max_new_tokens": 350})
    #output_norag = result["replies"][0]


    ### NORAG

    if rag_only == False:
        chat_text_qa_msgs_nr = [
                ChatMessage(
                    role=MessageRole.SYSTEM,
                    content=(
                        "Always answer the question"
                    ),
                ),
                ChatMessage(
                    role=MessageRole.USER,
                    content=(
                        "answer the question: {query_str}\n"
                    ),
                ),
            ]
        text_qa_template_nr = ChatPromptTemplate(chat_text_qa_msgs_nr)

            # Refine Prompt
        chat_refine_msgs_nr = [
                ChatMessage(
                    role=MessageRole.SYSTEM,
                    content=(
                        "Always answer the question"
                    ),
                ),
                ChatMessage(
                    role=MessageRole.USER,
                    content=(
                        "answer the question: {query_str}. "
                        "If the context isn't useful, output the original answer again.\n"
                        "Original Answer: {existing_answer}"
                    ),
                ),
            ]
        refine_template_nr = ChatPromptTemplate(chat_refine_msgs_nr)

        stream_nr= index.as_query_engine(
                  text_qa_template=text_qa_template_nr, refine_template=refine_template_nr, similarity_top_k=6
              ).query(prompt)

        ###

        output_norag = str(stream_nr)
        save_answer(prompt, output_rag, output_norag)

        return f'* Mixtral Output:\n{output_norag}\n\n* Mixtral + RAG Output:\n{output_rag}'

    return f'* Mixtral + RAG Output:\n{output_rag}'

    #for response in formatted_output:
    #      output += response
    #      yield output
    #return formatted_output

def upload_file(files):
    file_paths = [file.name for file in files]
    return file_paths

additional_inputs=[
    gr.Checkbox(
        label="RAG Only",
        interactive=True,
        value= False
    ),
    gr.Textbox(
        label="File Link",
        max_lines=1,
        interactive=True,
        value= "https://arxiv.org/pdf/2401.10020.pdf"
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=1024,
        minimum=0,
        maximum=2048,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]

examples=[["What is a trustworthy digital repository, where can you find this information?", None, None, None, None, None, None, ],
          ["What are things a repository must have?", None, None, None, None, None, None,],
          ["What principles should record creators follow?", None, None, None, None, None, None,],
          ["Write a very short summary of Data Sanitation Techniques by Edgar Dale, and write a citation in APA style.", None, None, None, None, None, None,],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None, None,],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None, None,],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="RAG Demo",
    examples=examples,
    #concurrency_limit=20,
).queue().launch(show_api=False,debug=True,share=True)

#iface = gr.Interface(fn=generate, inputs=["text"], outputs=["text", "text"],
#                     additional_inputs=additional_inputs, title="RAG Demo", examples=examples)
#iface.launch()