Spaces:
Sleeping
Sleeping
File size: 3,099 Bytes
bf5c1c9 a89ce24 bf5c1c9 207c16a 5517f9c bf5c1c9 ad92c8a 66ed14b 6b6d0ef 7a4525a 6b6d0ef 7a4525a 207c16a a89ce24 bf5c1c9 5517f9c bf5c1c9 c36c2b7 6e0a07a bf5c1c9 5517f9c 66ed14b 207c16a 1a3bc85 207c16a 1a3bc85 207c16a 66ed14b 207c16a 66ed14b 207c16a 66ed14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from fastapi import FastAPI
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import torch
app = FastAPI()
name = "meta-llama/Llama-2-7b-chat-hf"
customGen = False
gpt2based = False
# microsoft/DialoGPT-small
# microsoft/DialoGPT-medium
# microsoft/DialoGPT-large
# mistralai/Mixtral-8x7B-Instruct-v0.1
# Load the Hugging Face GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)
gpt2model = GPT2LMHeadModel.from_pretrained(name)
gpt2tokenizer = GPT2Tokenizer.from_pretrained(name)
class req(BaseModel):
prompt: str
length: int
@app.get("/")
def read_root():
return FileResponse(path="templates/index.html", media_type="text/html")
@app.post("/api")
def read_root(data: req):
print("Prompt:", data.prompt)
print("Length:", data.length)
if (name == "microsoft/DialoGPT-small" or name == "microsoft/DialoGPT-medium" or name == "microsoft/DialoGPT-large") and customGen == True:
# tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
# model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
step = 1
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(data.prompt + tokenizer.eos_token, return_tensors='pt')
# append the new user input tokens to the chat history
bot_input_ids = torch.cat(new_user_input_ids, dim=-1) if step > 0 else new_user_input_ids
# generated a response while limiting the total chat history to 1000 tokens,
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
generated_text = tokenizer.decode(chat_history_ids[:, :][0], skip_special_tokens=True)
answer_data = { "answer": generated_text }
print("Answer:", generated_text)
return answer_data
else:
if gpt2based == True:
input_text = data.prompt
# Tokenize the input text
input_ids = gpt2tokenizer.encode(input_text, return_tensors="pt")
# Generate output using the model
output_ids = gpt2model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
generated_text = gpt2tokenizer.decode(output_ids[0], skip_special_tokens=True)
answer_data = { "answer": generated_text }
print("Answer:", generated_text)
return answer_data
else:
input_text = data.prompt
# Tokenize the input text
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate output using the model
output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
answer_data = { "answer": generated_text }
print("Answer:", generated_text)
return answer_data |