File size: 6,853 Bytes
c02b4bf
 
 
 
6113980
b26b0a3
1c581ef
 
c02b4bf
6113980
 
07026cb
f06025d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138851d
07026cb
 
 
 
138851d
a880370
07026cb
 
 
 
 
 
 
 
2742bc2
02412d9
6113980
 
 
250a81c
6113980
250a81c
6113980
 
d3ae86a
1c581ef
 
c02b4bf
d3ae86a
c02b4bf
 
 
 
1c581ef
c02b4bf
 
1c581ef
c02b4bf
1c581ef
c02b4bf
1c581ef
c02b4bf
7dbd562
fe46f4b
 
7dbd562
 
 
 
fe46f4b
7dbd562
 
 
 
fe46f4b
7dbd562
fe46f4b
 
 
 
 
 
b518cbb
7502cc2
b518cbb
 
 
 
 
 
 
 
 
 
c02b4bf
c78ab4d
4d9ef07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfbc154
fe46f4b
b56ec7e
c02b4bf
 
 
 
 
 
 
 
 
 
 
1c581ef
 
 
 
c02b4bf
1c581ef
 
 
c02b4bf
1c581ef
 
 
 
 
c02b4bf
 
 
 
4dfdae4
4df40cd
c02b4bf
4d9ef07
 
 
 
 
 
c02b4bf
4d9ef07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import gradio as gr
import json
from huggingface_hub import InferenceClient
import gspread
from google.oauth2 import service_account
from datetime import datetime
import chromadb

# Google Sheets setup
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
key1 = os.getenv("key1")
key2 = os.getenv("key2")
key3 = os.getenv("key3")
key4 = os.getenv("key4")
key5 = os.getenv("key5")
key6 = os.getenv("key6")
key7 = os.getenv("key7")
key8 = os.getenv("key8")
key9 = os.getenv("key9")
key10 = os.getenv("key10")
key11 = os.getenv("key11")
key12 = os.getenv("key12")
key13 = os.getenv("key13")
key14 = os.getenv("key14")
key15 = os.getenv("key15")
key16 = os.getenv("key16")
key17 = os.getenv("key17")
key18 = os.getenv("key18")
key19 = os.getenv("key19")
key20 = os.getenv("key20")
key21 = os.getenv("key21")
key22 = os.getenv("key22")
key23 = os.getenv("key23")
key24 = os.getenv("key24")
key25 = os.getenv("key25")
key26 = os.getenv("key26")
key27 = os.getenv("key27")
key28 = os.getenv("key28")
pkey="-----BEGIN PRIVATE KEY-----\n"+key2+"\n"+key3+"\n"+ key4+"\n"+key5+"\n"+ key6+"\n"+key7+"\n"+key8+"\n"+key9+"\n"+key10+"\n"+key11+"\n"+key12+"\n"+key13+"\n"+key14+"\n"+key15+"\n"+key16+"\n"+key17+"\n"+key18+"\n"+key19+"\n"+key20+"\n"+key21+"\n"+key22+"\n"+key24+"\n"+key25+"\n"+key26+"\n"+key27+"\n"+key28+"\n-----END PRIVATE KEY-----\n"
json_data={
  "type": "service_account",
  "project_id": "nestolechatbot",
  "private_key_id": key1,
  "private_key": pkey,
  "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com",
  "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com",
  "client_id": "107457262210035412036",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/nestoleservice%40nestolechatbot.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}
creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)

client = gspread.authorize(creds)
sheet = client.open("nestolechatbot").sheet1  # Open the sheet

def save_to_sheet(date, name, message, IP, dev, header):
    # Write user input to the Google Sheet
    sheet.append_row([date, name, message, IP, dev, header])
    return f"Thanks {name}, your message has been saved!"

path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
if not os.path.exists(path):
    path = "/home/user/app/chromaTS"

print(path)
client = chromadb.PersistentClient(path=path)
print(client.heartbeat()) 
print(client.get_version())  
print(client.list_collections()) 

from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")

collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)

inference_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def extract_ip_and_device(headers_obj):
    ip_address = None
    device_info = None
    
    # Access the raw headers list
    headers = headers_obj.raw
    
    for header in headers:
        if len(header) != 2:
            print(f"Unexpected header format: {header}")
            continue
        
        key, value = header
        
        if key == b'x-forwarded-for':
            ip_address = value.decode('utf-8')
        elif key == b'user-agent':
            device_info = value.decode('utf-8')
    
    return ip_address, device_info

def format_prompt(message, history):
    print("HISTORY")
    print(history)
    prompt = ""
    if history:
        user_prompt, bot_response = history[-1]
        prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    print("Final P")
    print(prompt)
    return prompt

def response(request: gr.Request,prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0):
    global_url = ""  # Initialize URL variable
    # JavaScript code to extract URL from the browser
    js_code = """
    <script>
    function extractUrl() {
        return window.location.href;
    }
    </script>
    """
    
    # Extract URL using JavaScript
    url_script = '<script>var url = extractUrl(); document.getElementById("url").innerText = url;</script>'
    url_extracted = "<div id='url'></div>"  # Placeholder for URL extraction

    print(f"Working with URL: {url_extracted}")
    headers = request.headers
    IP, dev = extract_ip_and_device(headers)
    print(headers)
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    search_prompt = format_prompt(prompt, history)
    results = collection.query(
        query_texts=[search_prompt],
        n_results=60,
    )
    dists = ["<br><small>(relevance: " + str(round((1-d)*100)/100) + ";" for d in results['distances'][0]]
    results = results['documents'][0]
    combination = zip(results, dists)
    combination = [' '.join(triplets) for triplets in combination]
    if len(results) > 1:
        addon = "Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n" + "\n".join(results)
    system = "Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt." + addon + "\n\nUser-Anliegen:"   
    formatted_prompt = format_prompt(system + "\n" + prompt, history)
    stream = inference_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    now = str(datetime.now())
    save_to_sheet(now, prompt, output, IP, dev, str(headers))
    yield output
    
gr.ChatInterface(
    response,
    chatbot=gr.Chatbot(value=[[None, "Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]], render_markdown=True),
    title="German Studyhelper Chätti"
).queue().launch(share=True)

print("Interface up and running!")