AFischer1985 commited on
Commit
2069576
1 Parent(s): 6302da5

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b847b433ffbe825c9b7a325217a52214c01b32f0da3060ff91c73ce7682de0a7
3
+ size 16060000
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eb3d22f8e3406cb187c96ba787c1946b3b083c965d1dc81c68cbd6ed33663f4
3
+ size 100
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ec30953f1f095a65b18e2e9e27ed72bb53c094178bdd2d2f1296c6fd2ea19e
3
+ size 126897
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf428039177ac0281bb54df22b4eb70a4f986d07422a428b25b7ff53fbaa8a44
3
+ size 20000
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7bf5913abf03fc400fbd706f94f59f83192432c334a3ed229a7a3111593106
3
+ size 42780
db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a03963c51eeaca2bd9af237fadc4d990c85c8ceacf5cb9423278c972b993bd2
3
+ size 49680384
run.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #############################################################################
2
+ # Title: BERUFENET.AI
3
+ # Author: Andreas Fischer
4
+ # Date: January 4th, 2024
5
+ # Last update: February 8th, 2024
6
+ #############################################################################
7
+
8
+ dbPath="/home/af/Schreibtisch/Code/gradio/BERUFENET/db"
9
+ if(os.path.exists(dbPath)==False): dbPath="/home/user/app/db"
10
+
11
+ print(dbPath)
12
+
13
+ # Chroma-DB
14
+ #-----------
15
+
16
+ import chromadb
17
+ #client = chromadb.Client()
18
+ path=dbPath
19
+ client = chromadb.PersistentClient(path=path)
20
+ print(client.heartbeat())
21
+ print(client.get_version())
22
+ print(client.list_collections())
23
+ from chromadb.utils import embedding_functions
24
+ default_ef = embedding_functions.DefaultEmbeddingFunction()
25
+ sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
26
+ #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
27
+ print(str(client.list_collections()))
28
+
29
+ global collection
30
+ if("name=BerufenetDB1" in str(client.list_collections())): #(False):
31
+ print("BerufenetDB1 found!")
32
+ collection = client.get_collection(name="BerufenetDB1", embedding_function=sentence_transformer_ef)
33
+
34
+ print("Database ready!")
35
+ print(collection.count())
36
+
37
+
38
+ # Model
39
+ #-------
40
+
41
+ from huggingface_hub import InferenceClient
42
+ import gradio as gr
43
+
44
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
45
+
46
+
47
+ # Gradio-GUI
48
+ #------------
49
+
50
+ import gradio as gr
51
+ import json
52
+
53
+ def format_prompt(message, history):
54
+ prompt = "" #"<s>"
55
+ #for user_prompt, bot_response in history:
56
+ # prompt += f"[INST] {user_prompt} [/INST]"
57
+ # prompt += f" {bot_response}</s> "
58
+ prompt += f"[INST] {message} [/INST]"
59
+ return prompt
60
+
61
+ def response(
62
+ prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
63
+ ):
64
+ temperature = float(temperature)
65
+ if temperature < 1e-2: temperature = 1e-2
66
+ top_p = float(top_p)
67
+ generate_kwargs = dict(
68
+ temperature=temperature,
69
+ max_new_tokens=max_new_tokens,
70
+ top_p=top_p,
71
+ repetition_penalty=repetition_penalty,
72
+ do_sample=True,
73
+ seed=42,
74
+ )
75
+ addon=""
76
+ results=collection.query(
77
+ query_texts=[prompt],
78
+ n_results=5,
79
+ #where={"source": "google-docs"}
80
+ #where_document={"$contains":"search_string"}
81
+ )
82
+ dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
83
+ sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
84
+ results=results['documents'][0]
85
+ combination = zip(results,dists,sources)
86
+ combination = [' '.join(triplets) for triplets in combination]
87
+ print(str(prompt)+"\n\n"+str(combination))
88
+ if(len(results)>1):
89
+ addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
90
+ system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete Berufe empfiehlt."+addon+"\n\nUser-Anliegen:"
91
+ formatted_prompt = format_prompt(system+"\n"+prompt, history)
92
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
93
+ output = ""
94
+ for response in stream:
95
+ output += response.token.text
96
+ yield output
97
+ output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
98
+ yield output
99
+
100
+ gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten passenden Berufe empfiehlt.<br>Erzähle mir, was du gerne tust!"]],render_markdown=True),title="German BERUFENET-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
101
+ print("Interface up and running!")