Spaces:
Paused
Paused
from fastapi import FastAPI | |
from transformers import pipeline | |
from txtai.embeddings import Embeddings | |
from txtai.pipeline import Extractor | |
from llama_cpp import Llama | |
# NOTE - we configure docs_url to serve the interactive Docs at the root path | |
# of the app. This way, we can use the docs as a landing page for the app on Spaces. | |
app = FastAPI(docs_url="/") | |
# Create embeddings model with content support | |
# embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True}) | |
# embeddings.load('index') | |
# Create extractor instance | |
#extractor = Extractor(embeddings, "google/flan-t5-base") | |
pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin") | |
def generate(text: str): | |
""" | |
llama2 q4 backend | |
""" | |
output = pipe(text) | |
return {"output": output[0]["generated_text"]} | |
def prompt(question): | |
return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered. | |
Question: {question} | |
Context: """ | |
def search(query, question=None): | |
# Default question to query if empty | |
if not question: | |
question = query | |
return extractor([("answer", query, prompt(question), False)])[0][1] | |
# @app.get("/rag") | |
# def rag(question: str): | |
# # question = "what is the document about?" | |
# answer = search(question) | |
# # print(question, answer) | |
# return {answer} | |