Spaces:
Sleeping
Sleeping
File size: 925 Bytes
8f746af cf4adb6 a40bb81 9ebefd7 fe046f6 a40bb81 8f746af 230646e 8f746af a40bb81 fe046f6 477cfd2 d157505 a40bb81 e5085a0 230646e d157505 477cfd2 a40bb81 d157505 8f746af ba54308 d157505 aeac9e6 8f746af 477cfd2 8f746af 52534e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
import torch
from transformers import AutoTokenizer
from awq import AutoAWQForCausalLM
model_path = "bragour/Camel-7b-chat-awq"
model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
def respond(
message
):
formatted_prompt = f"<s>[INST]{message}[/INST]"
tokens = tokenizer(formatted_prompt, return_tensors='pt').input_ids.cuda()
# Generate the response from the API
result = model.generate(
tokens,
do_sample=False,
max_new_tokens=200
)
response = tokenizer.decode(result[0], skip_special_tokens=True)
return response
# Define the Gradio interface
demo = gr.Interface(
fn=respond,
inputs="text",
outputs=["text"]
)
demo.launch(inline=False)
if __name__ == "__main__":
demo.launch()
|