File size: 6,098 Bytes
3346531
 
 
 
 
 
 
 
ff461f9
 
 
 
 
3346531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff461f9
3346531
 
 
 
 
 
 
 
 
ff461f9
3346531
 
 
 
 
 
 
ff461f9
3346531
 
 
 
 
ff461f9
3346531
 
 
 
 
 
ff461f9
3346531
 
ff461f9
3346531
 
ff461f9
3346531
ff461f9
3346531
 
 
ff461f9
3346531
 
 
 
 
ff461f9
3346531
 
 
 
 
 
ff461f9
3346531
 
 
 
ff461f9
 
3346531
ff461f9
3346531
 
ff461f9
3346531
ff461f9
3346531
 
 
 
 
 
ff461f9
3346531
 
 
 
ff461f9
 
3346531
 
 
ff461f9
3346531
ff461f9
3346531
 
ff461f9
3346531
ff461f9
3346531
 
ff461f9
3346531
 
 
ff461f9
3346531
 
ff461f9
 
3346531
 
ff461f9
3346531
 
ff461f9
3346531
ff461f9
3346531
 
 
ff461f9
3346531
 
ff461f9
 
3346531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff461f9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import google.generativeai as genai
import requests
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from bs4 import BeautifulSoup
import gradio as gr

# Configure Gemini API key
GOOGLE_API_KEY = 'AIzaSyA0yLvySmj8xjMd0sedSgklg1fj0wBDyyw'  # Replace with your API key
genai.configure(api_key=GOOGLE_API_KEY)

# Fetch lecture notes and model architectures
def fetch_lecture_notes():
    lecture_urls = [
        "https://stanford-cs324.github.io/winter2022/lectures/introduction/",
        "https://stanford-cs324.github.io/winter2022/lectures/capabilities/",
        "https://stanford-cs324.github.io/winter2022/lectures/data/",
        "https://stanford-cs324.github.io/winter2022/lectures/modeling/"
    ]
    lecture_texts = []
    for url in lecture_urls:
        response = requests.get(url)
        if response.status_code == 200:
            print(f"Fetched content from {url}")
            lecture_texts.append((extract_text_from_html(response.text), url))
        else:
            print(f"Failed to fetch content from {url}, status code: {response.status_code}")
    return lecture_texts

def fetch_model_architectures():
    url = "https://github.com/Hannibal046/Awesome-LLM#milestone-papers"
    response = requests.get(url)
    if response.status_code == 200:
        print(f"Fetched model architectures, status code: {response.status_code}")
        return extract_text_from_html(response.text), url
    else:
        print(f"Failed to fetch model architectures, status code: {response.status_code}")
        return "", url

# Extract text from HTML content
def extract_text_from_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    for script in soup(["script", "style"]):
        script.extract()
    text = soup.get_text(separator="\n", strip=True)
    return text

# Generate embeddings using SentenceTransformers
def create_embeddings(texts, model):
    texts_only = [text for text, _ in texts]
    embeddings = model.encode(texts_only)
    return embeddings

# Initialize FAISS index
def initialize_faiss_index(embeddings):
    dimension = embeddings.shape[1]  # Assuming all embeddings have the same dimension
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings.astype('float32'))
    return index

# Handle natural language queries
conversation_history = []

def handle_query(query, faiss_index, embeddings_texts, model):
    global conversation_history

    query_embedding = model.encode([query]).astype('float32')

    # Search FAISS index
    _, indices = faiss_index.search(query_embedding, 3)  # Retrieve top 3 results
    relevant_texts = [embeddings_texts[idx] for idx in indices[0]]

    # Combine relevant texts and truncate if necessary
    combined_text = "\n".join([text for text, _ in relevant_texts])
    max_length = 500  # Adjust as necessary
    if len(combined_text) > max_length:
        combined_text = combined_text[:max_length] + "..."

    # Generate a response using Gemini
    try:
        response = genai.generate_text(
            model="models/text-bison-001",
            prompt=f"Based on the following context:\n\n{combined_text}\n\nAnswer the following question: {query}",
            max_output_tokens=200
        )
        generated_text = response.result if response else "No response generated."
    except Exception as e:
        print(f"Error generating text: {e}")
        generated_text = "An error occurred while generating the response."

    # Update conversation history
    conversation_history.append((query, generated_text))

    # Extract sources
    sources = [url for _, url in relevant_texts]

    return generated_text, sources

def generate_concise_response(prompt, context):
    try:
        response = genai.generate_text(
            model="models/text-bison-001",
            prompt=f"{prompt}\n\nContext: {context}\n\nAnswer:",
            max_output_tokens=200
        )
        return response.result if response else "No response generated."
    except Exception as e:
        print(f"Error generating concise response: {e}")
        return "An error occurred while generating the concise response."

# Main function to execute the pipeline
def chatbot(message, history):
    lecture_notes = fetch_lecture_notes()
    model_architectures = fetch_model_architectures()

    all_texts = lecture_notes + [model_architectures]

    # Load the SentenceTransformers model
    embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

    embeddings = create_embeddings(all_texts, embedding_model)

    # Initialize FAISS index
    faiss_index = initialize_faiss_index(np.array(embeddings))

    response, sources = handle_query(message, faiss_index, all_texts, embedding_model)
    print("Query:", message)
    print("Response:", response)
    total_text = response
    
    if sources:
        print("Sources:", sources)
        relevant_source = "\n".join(sources)
        total_text += f"\n\nSources:\n{relevant_source}"
    else:
        print("Sources: None of the provided sources were used.")
    
    print("----")

    # Generate a concise and relevant summary using Gemini
    prompt = "Summarize the user queries so far"
    user_queries_summary = " ".join([msg[0] for msg in history] + [message])
    concise_response = generate_concise_response(prompt, user_queries_summary)
    print("Concise Response:")
    print(concise_response)
        
    return total_text

# Create the Gradio interface
iface = gr.ChatInterface(
    chatbot,
    title="LLM Research Assistant",
    description="Ask questions about LLM architectures, datasets, and training techniques.",
    examples=[
        "What are some milestone model architectures in LLMs?",
        "Explain the transformer architecture.",
        "Tell me about datasets used to train LLMs.",
        "How are LLM training datasets cleaned and preprocessed?",
        "Summarize the user queries so far"
    ],
    retry_btn="Regenerate",
    undo_btn="Undo",
    clear_btn="Clear",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)