File size: 3,099 Bytes
bf5c1c9
 
 
 
 
a89ce24
bf5c1c9
207c16a
5517f9c
 
bf5c1c9
ad92c8a
66ed14b
 
6b6d0ef
7a4525a
 
 
 
6b6d0ef
 
7a4525a
207c16a
 
 
 
 
a89ce24
bf5c1c9
 
 
5517f9c
bf5c1c9
 
 
c36c2b7
6e0a07a
bf5c1c9
 
 
5517f9c
66ed14b
207c16a
 
 
 
 
 
 
 
 
1a3bc85
207c16a
 
 
 
1a3bc85
207c16a
 
 
 
 
66ed14b
 
207c16a
66ed14b
 
 
 
 
 
 
 
 
 
 
 
 
207c16a
66ed14b
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from fastapi import FastAPI

import os

from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import torch

app = FastAPI()
name = "meta-llama/Llama-2-7b-chat-hf"
customGen = False
gpt2based = False

# microsoft/DialoGPT-small
# microsoft/DialoGPT-medium
# microsoft/DialoGPT-large

# mistralai/Mixtral-8x7B-Instruct-v0.1

# Load the Hugging Face GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

gpt2model = GPT2LMHeadModel.from_pretrained(name)
gpt2tokenizer = GPT2Tokenizer.from_pretrained(name)

class req(BaseModel):
  prompt: str
  length: int

@app.get("/")
def read_root():
  return FileResponse(path="templates/index.html", media_type="text/html")

@app.post("/api")
def read_root(data: req):
  print("Prompt:", data.prompt)
  print("Length:", data.length)
  
  if (name == "microsoft/DialoGPT-small" or name == "microsoft/DialoGPT-medium" or name == "microsoft/DialoGPT-large") and customGen == True:
    # tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
    # model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
    
    step = 1
    
    # encode the new user input, add the eos_token and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(data.prompt + tokenizer.eos_token, return_tensors='pt')

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat(new_user_input_ids, dim=-1) if step > 0 else new_user_input_ids

    # generated a response while limiting the total chat history to 1000 tokens, 
    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

    generated_text = tokenizer.decode(chat_history_ids[:, :][0], skip_special_tokens=True)
    answer_data = { "answer": generated_text }
    print("Answer:", generated_text)
    
    return answer_data
  else:
    if gpt2based == True:
      input_text = data.prompt
    
      # Tokenize the input text
      input_ids = gpt2tokenizer.encode(input_text, return_tensors="pt")
      
      # Generate output using the model
      output_ids = gpt2model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
      generated_text = gpt2tokenizer.decode(output_ids[0], skip_special_tokens=True)
      
      answer_data = { "answer": generated_text }
      print("Answer:", generated_text)
      
      return answer_data
    else:
      input_text = data.prompt
    
      # Tokenize the input text
      input_ids = tokenizer.encode(input_text, return_tensors="pt")
      
      # Generate output using the model
      output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
      generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
      
      answer_data = { "answer": generated_text }
      print("Answer:", generated_text)
      
      return answer_data