Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import snapshot_download | |
# Set device to CPU | |
device = "cpu" | |
repo_id = 'amgadhasan/phi-2' | |
model_path = snapshot_download(repo_id=repo_id, repo_type="model", local_dir="./phi-2", use_auth_token=False) | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
# Set default dtype to float32 for compatibility with CPU | |
torch.set_default_dtype(torch.float32) | |
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True) | |
def generate(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
outputs = model.generate(**inputs, max_length=200) | |
completion = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return completion | |
def ask_question(user_question): | |
if user_question.lower() == 'quit': | |
return "Session ended. Goodbye!" | |
else: | |
# Here, we're explicitly setting the context for an academic answer. | |
prompt = f"Academic response to the question about basic science subjects: {user_question}" | |
answer = generate(prompt) | |
return answer | |
iface = gr.Interface(fn=ask_question, inputs="text", outputs="text") | |
iface.launch(share=True) |