File size: 2,351 Bytes
5e89046
b484c84
5e89046
b55efad
 
 
 
5e89046
 
 
 
b484c84
5e89046
b55efad
 
 
 
 
 
b484c84
b55efad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e89046
 
 
 
 
 
b21f32e
 
5e89046
b21f32e
5e89046
ac3718d
b21f32e
 
5e89046
 
 
b21f32e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import spaces
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Svngoku/c4ai-command-r7b-12-2024-4bit")
model = AutoModelForCausalLM.from_pretrained("Svngoku/c4ai-command-r7b-12-2024-4bit")

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("Svngoku/c4ai-command-r7b-12-2024-4bit")

def wrap_text_output(text):
    wrapped_text = f"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{text}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
    return wrapped_text



@spaces.GPU
def generate_response(message, history, documents_text):
    conversation = history + [{"role": "user", "content": message}]
    documents = []
    for doc in documents_text.split('\n'):
        if doc.strip():
            try:
                heading, body = doc.split(':', 1)
                documents.append({"heading": heading.strip(), "body": body.strip()})
            except ValueError:
                print(f"Invalid document format: {doc}")

    input_prompt = tokenizer.apply_chat_template(
        conversation=conversation,
        documents=documents,
        tokenize=False,
        add_generation_prompt=True,
        return_tensors="pt",
    )
    input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids
    input_ids = input_ids.to(model.device)
    gen_tokens = model.generate(
        input_ids, max_new_tokens=2048, do_sample=True, temperature=0.3
    )
    gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)

    # Extract chatbot's response
    chatbot_response = gen_text.split("<|CHATBOT_TOKEN|>")[-1]

    return chatbot_response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    fn=generate_response,
    type="messages",
    additional_inputs=[
      gr.Textbox(lines=5, placeholder="Enter documents (heading: body) separated by new lines...")
    ],
    theme="ocean",
    title="Simple Chat with RAG",
    description="Ask a question and provide relevant documents for context"
)

if __name__ == "__main__":
    demo.launch()