Spaces:
Sleeping
Sleeping
File size: 4,152 Bytes
6f5ab24 d0b143b 6f5ab24 d0b143b b884aa9 206bc94 9fb5cd1 206bc94 22e7c8d 206bc94 6f5ab24 22e7c8d 206bc94 d0b143b 22e7c8d d0b143b 22e7c8d 708fcdb d0b143b b884aa9 206bc94 b884aa9 6f5ab24 206bc94 b884aa9 6f5ab24 206bc94 b884aa9 206bc94 6f5ab24 b884aa9 dc294fb b884aa9 98b8089 316ac93 b884aa9 6f5ab24 206bc94 9fb5cd1 d0b143b 1202d82 d0b143b 7623dc6 d0b143b 1202d82 dc294fb 6f5ab24 b884aa9 62841b2 81fddf5 b884aa9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio # Interface handling
import spaces # GPU
import langchain_community.vectorstores # Vectorstore for publications
import langchain_huggingface # Embeddings
import transformers # LLM
# The number of publications to retrieve for the prompt
PUBLICATIONS_TO_RETRIEVE = 5
# The template for the RAG prompt
RAG_TEMPLATE = """You are an AI assistant who enjoys helping users learn about research.
Answer the USER_QUERY on additive manufacturing research using the RESEARCH_EXCERPTS.
Provide a concise ANSWER based on these excerpts. Avoid listing references.
===== RESEARCH_EXCERPTS =====
{research_excerpts}
===== USER_QUERY =====
{query}
===== ANSWER =====
"""
# Example Queries for Interface
EXAMPLE_QUERIES = [
{"text": "What is multi-material 3D printing?"},
{"text": "How is additive manufacturing being applied in aerospace?"},
{"text": "Tell me about innovations in metal 3D printing techniques."},
{"text": "What are some sustainable materials for 3D printing?"},
{"text": "What are the challenges with support structures in 3D printing?"},
{"text": "How is 3D printing impacting the medical field?"},
{"text": "What are common applications of additive manufacturing in industry?"},
{"text": "What are the benefits and limitations of using polymers in 3D printing?"},
{"text": "Tell me about the environmental impacts of additive manufacturing."},
{"text": "What are the primary limitations of current 3D printing technologies?"},
{"text": "How are researchers improving the speed of 3D printing processes?"},
{"text": "What are best practices for post-processing in additive manufacturing?"},
]
# Load vectorstore of SFF publications
publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
folder_path="publication_vectorstore",
embeddings=langchain_huggingface.HuggingFaceEmbeddings(
model_name="all-MiniLM-L12-v2",
model_kwargs={"device": "cuda"},
encode_kwargs={"normalize_embeddings": False},
),
allow_dangerous_deserialization=True,
)
# Create the callable LLM
model = transformers.AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-7B-Instruct-AWQ"
)
model.to("cuda") # Move the model to GPU
tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
llm = transformers.pipeline(
task="text-generation",
model=model,
tokenizer=tokenizer,
device="cuda",
)
def preprocess(query: str) -> str:
"""
Generates a prompt based on the top k documents matching the query.
Args:
query (str): The user's query.
Returns:
str: The formatted prompt containing research excerpts and the user's query.
"""
# Search for the top k documents matching the query
documents = publication_vectorstore.search(
query, k=PUBLICATIONS_TO_RETRIEVE, search_type="similarity"
)
# Extract the page content from the documents
research_excerpts = [f'"... {doc.page_content}..."' for doc in documents]
# Format the prompt with the research excerpts and the user's query
prompt = RAG_TEMPLATE.format(
research_excerpts="\n\n".join(research_excerpts), query=query
)
return prompt
@spaces.GPU(duration=30)
def reply(message: str, history: list[str]) -> str:
"""
Generates a response to the user’s message.
Args:
message (str): The user's message or query.
history (list[str]): The conversation history.
Returns:
str: The generated response from the language model.
"""
# Preprocess the user's message
rag_prompt = preprocess(message)
# Generate a response from the language model
response = llm(rag_prompt, max_new_tokens=512, return_full_text=False)
# Return the generated response
return response[0]["generated_text"].strip("= ")
# Run the Gradio Interface
gradio.ChatInterface(
reply,
examples=EXAMPLE_QUERIES,
cache_examples=False,
chatbot=gradio.Chatbot(
show_label=False,
show_share_button=False,
show_copy_button=False,
bubble_full_width=False,
),
).launch(debug=True)
|