Spaces:
Runtime error
Runtime error
File size: 1,744 Bytes
05e5bda e76dfb4 05e5bda f2f8e53 05e5bda ab2323e e76dfb4 05e5bda f2f8e53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
import torch
from peft import prepare_model_for_kbit_training
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "inception-mbzuai/jais-13b-chat"
prompt_eng = "### Instruction: \n\nComplete the conversation below between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n### Response: [|AI|]"
prompt_ar = "### Instruction: \n\nΨ£ΩΩ
Ω Ψ§ΩΩ
ΨΨ§Ψ―Ψ«Ψ© Ψ£Ψ―ΩΨ§Ω Ψ¨ΩΩ [|Human|] Ω [|AI|]:\n### Input: [|Human|] {Question}\n### Response: [|AI|]"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_path)
#model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
#model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("inception-mbzuai/jais-13b-chat", load_in_8bit=True, device_map="auto", trust_remote_code=True)
model = prepare_model_for_kbit_training(model)
def get_response(text,tokenizer=tokenizer,model=model):
input_ids = tokenizer(text, return_tensors="pt").input_ids
inputs = input_ids.to(device)
input_len = inputs.shape[-1]
generate_ids = model.generate(
inputs,
top_p=0.9,
temperature=0.3,
max_length=2048-input_len,
min_length=input_len + 4,
repetition_penalty=1.2,
do_sample=True,
)
response = tokenizer.batch_decode(
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
)[0]
response = response.split("### Response: [|AI|]")
return response
def greet():
ques= input()
text = prompt_ar.format_map({'Question':ques})
return get_response(text)
|