Spaces:
Sleeping
Sleeping
File size: 7,465 Bytes
321ea15 176be12 321ea15 176be12 321ea15 176be12 321ea15 176be12 321ea15 176be12 321ea15 176be12 321ea15 176be12 321ea15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import threading
import http.server
import socketserver
import os
import yaml
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr
from utils.upload_file import UploadFile
from utils.chatbot import ChatBot
from utils.ui_settings import UISettings
from utils.load_config import LoadConfig
from pyprojroot import here
# Load the app config
with open(here("configs/app_config.yml")) as cfg:
app_config = yaml.load(cfg, Loader=yaml.FullLoader)
PORT = app_config["serve"]["port"]
DIRECTORY1 = app_config["directories"]["data_directory"]
DIRECTORY2 = app_config["directories"]["data_directory_2"]
# ================================
# Part 1: Reference Serve Code
# ================================
class MultiDirectoryHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
"""Serve files from multiple directories."""
def translate_path(self, path):
parts = path.split('/', 2)
if len(parts) > 1:
first_directory = parts[1]
if first_directory == os.path.basename(DIRECTORY1):
path = os.path.join(DIRECTORY1, *parts[2:])
elif first_directory == os.path.basename(DIRECTORY2):
path = os.path.join(DIRECTORY2, *parts[2:])
else:
file_path1 = os.path.join(DIRECTORY1, first_directory)
file_path2 = os.path.join(DIRECTORY2, first_directory)
if os.path.isfile(file_path1):
return file_path1
elif os.path.isfile(file_path2):
return file_path2
return super().translate_path(path)
def start_reference_server():
with socketserver.TCPServer(("", PORT), MultiDirectoryHTTPRequestHandler) as httpd:
print(f"Serving at port {PORT}")
httpd.serve_forever()
# ================================
# Part 2: LLM Serve Code
# ================================
APPCFG = LoadConfig()
app = Flask(__name__)
# Load the LLM and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
APPCFG.llm_engine, token=APPCFG.gemma_token, device=APPCFG.device)
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="BioMistral/BioMistral-7B",
token=APPCFG.gemma_token,
torch_dtype=torch.float16,
device_map=APPCFG.device)
app_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
@app.route("/generate_text", methods=["POST"])
def generate_text():
data = request.json
prompt = data.get("prompt", "")
max_new_tokens = data.get("max_new_tokens", 1000)
do_sample = data.get("do_sample", True)
temperature = data.get("temperature", 0.1)
top_k = data.get("top_k", 50)
top_p = data.get("top_p", 0.95)
tokenized_prompt = app_pipeline.tokenizer.apply_chat_template(
prompt, tokenize=False, add_generation_prompt=True)
outputs = app_pipeline(
tokenized_prompt,
max_new_tokens=max_new_tokens,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p
)
return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]})
def start_llm_server():
app.run(debug=False, port=8888)
# ================================
# Part 3: Gradio Chatbot Code
# ================================
def start_gradio_app():
with gr.Blocks() as demo:
with gr.Tabs():
with gr.TabItem("Med-App"):
# First row
with gr.Row() as row_one:
with gr.Column(visible=False) as reference_bar:
ref_output = gr.Markdown()
with gr.Column() as chatbot_output:
chatbot = gr.Chatbot(
[], elem_id="chatbot", bubble_full_width=False, height=500,
avatar_images=("images/test.png", "images/Gemma-logo.png")
)
chatbot.like(UISettings.feedback, None, None)
# Second row
with gr.Row():
input_txt = gr.Textbox(
lines=4, scale=8, placeholder="Enter text and press enter, or upload PDF files"
)
# Third row
with gr.Row() as row_two:
text_submit_btn = gr.Button(value="Submit text")
btn_toggle_sidebar = gr.Button(value="References")
upload_btn = gr.UploadButton(
"π Upload PDF or doc files", file_types=['.pdf', '.doc'], file_count="multiple"
)
clear_button = gr.ClearButton([input_txt, chatbot])
rag_with_dropdown = gr.Dropdown(
label="RAG with", choices=["Preprocessed doc", "Upload doc: Process for RAG"], value="Preprocessed doc"
)
# Fourth row
with gr.Row() as row_four:
temperature_bar = gr.Slider(
minimum=0.1, maximum=1, value=0.1, step=0.1, label="Temperature",
info="Increasing the temperature will make the model answer more creatively."
)
top_k = gr.Slider(
minimum=0.0, maximum=100.0, step=1, label="top_k", value=50,
info="A lower value (e.g. 10) will result in more conservative answers."
)
top_p = gr.Slider(
minimum=0.0, maximum=1.0, step=0.01, label="top_p", value=0.95,
info="A lower value will generate more focused and conservative text."
)
# Process uploaded files and text
file_msg = upload_btn.upload(
fn=UploadFile.process_uploaded_files, inputs=[upload_btn, chatbot, rag_with_dropdown],
outputs=[input_txt, chatbot], queue=False
)
txt_msg = input_txt.submit(
fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
outputs=[input_txt, chatbot, ref_output], queue=False
).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
text_submit_btn.click(
fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
outputs=[input_txt, chatbot, ref_output], queue=False
).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
demo.launch()
# ================================
# Main: Running all services concurrently
# ================================
if __name__ == "__main__":
# Start all services in separate threads
reference_server_thread = threading.Thread(target=start_reference_server)
llm_server_thread = threading.Thread(target=start_llm_server)
gradio_app_thread = threading.Thread(target=start_gradio_app)
reference_server_thread.start()
llm_server_thread.start()
gradio_app_thread.start()
# Keep the main thread alive
reference_server_thread.join()
llm_server_thread.join()
gradio_app_thread.join()
|