Korron commited on
Commit
526702f
1 Parent(s): 853ccba

base model

Browse files
Files changed (3) hide show
  1. app.py +24 -29
  2. app_old.py +28 -23
  3. rag.py +2 -2
app.py CHANGED
@@ -1,16 +1,30 @@
1
  import gradio as gr
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
10
- client = InferenceClient(model_name)
11
 
12
 
13
- print("test")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def respond(
16
  message,
@@ -22,31 +36,12 @@ def respond(
22
  ):
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
- for val in history:
26
- if val[0]:
27
- messages.append({"role": "user", "content": val[0]})
28
- if val[1]:
29
- messages.append({"role": "assistant", "content": val[1]})
30
-
31
- messages.append({"role": "user", "content": message})
32
-
33
- response = ""
34
-
35
- for message in client.chat_completion(
36
- messages,
37
- max_tokens=max_tokens,
38
- stream=True,
39
- temperature=temperature,
40
- top_p=top_p,
41
- ):
42
- token = message.choices[0].delta.content
43
 
44
- response += token
45
- yield response
46
 
47
- """
48
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
49
- """
50
  demo = gr.ChatInterface(
51
  respond,
52
  additional_inputs=[
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from inference import get_bot_response
4
+ from rag import get_context
5
+ from config import config
6
  from huggingface_hub import InferenceClient
7
 
 
 
 
 
8
 
9
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
 
10
 
11
 
12
+ client = InferenceClient(model_name)
13
+ print("tokenizer start loading")
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
15
+ print("tokenizer loaded")
16
+ print("model start loading")
17
+ model = AutoModelForCausalLM.from_pretrained(model_name,
18
+ device_map="auto",
19
+ trust_remote_code=False,
20
+ revision="main")
21
+ print("model loaded")
22
+
23
+ # model = AutoModelForCausalLM.from_pretrained(config["model_checkpoint"],
24
+ # device_map="auto",
25
+ # trust_remote_code=False,
26
+ # revision="main")
27
+
28
 
29
  def respond(
30
  message,
 
36
  ):
37
  messages = [{"role": "system", "content": system_message}]
38
 
39
+ request = message
40
+ context = get_context(request, config["top_k"])
41
+ response = get_bot_response(request, context, model, tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ return response
 
44
 
 
 
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
app_old.py CHANGED
@@ -1,30 +1,16 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from inference import get_bot_response
4
- from rag import get_context
5
- from config import config
6
  from huggingface_hub import InferenceClient
7
 
 
 
 
 
8
 
9
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
10
-
11
-
12
  client = InferenceClient(model_name)
13
 
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
15
-
16
- print("model start loading")
17
- model = AutoModelForCausalLM.from_pretrained(model_name,
18
- device_map="auto",
19
- trust_remote_code=False,
20
- revision="main")
21
- print("model loaded")
22
-
23
- # model = AutoModelForCausalLM.from_pretrained(config["model_checkpoint"],
24
- # device_map="auto",
25
- # trust_remote_code=False,
26
- # revision="main")
27
 
 
28
 
29
  def respond(
30
  message,
@@ -36,12 +22,31 @@ def respond(
36
  ):
37
  messages = [{"role": "system", "content": system_message}]
38
 
39
- request = message
40
- context = get_context(request, config["top_k"])
41
- response = get_bot_response(request, context, model, tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- return response
 
44
 
 
 
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
 
1
  import gradio as gr
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
 
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
 
 
10
  client = InferenceClient(model_name)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ print("test")
14
 
15
  def respond(
16
  message,
 
22
  ):
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
+ for val in history:
26
+ if val[0]:
27
+ messages.append({"role": "user", "content": val[0]})
28
+ if val[1]:
29
+ messages.append({"role": "assistant", "content": val[1]})
30
+
31
+ messages.append({"role": "user", "content": message})
32
+
33
+ response = ""
34
+
35
+ for message in client.chat_completion(
36
+ messages,
37
+ max_tokens=max_tokens,
38
+ stream=True,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ ):
42
+ token = message.choices[0].delta.content
43
 
44
+ response += token
45
+ yield response
46
 
47
+ """
48
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
49
+ """
50
  demo = gr.ChatInterface(
51
  respond,
52
  additional_inputs=[
rag.py CHANGED
@@ -5,7 +5,7 @@ from llama_index.core.query_engine import RetrieverQueryEngine
5
  from llama_index.core.postprocessor import SimilarityPostprocessor
6
 
7
  from config import config
8
-
9
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
10
 
11
  Settings.llm = None
@@ -14,7 +14,7 @@ Settings.chunk_overlap = config["chunk_overlap"]
14
 
15
  documents = SimpleDirectoryReader("rag_data").load_data()
16
  index = VectorStoreIndex.from_documents(documents)
17
-
18
  def get_context(query, top_k = 3):
19
  retriever = VectorIndexRetriever(
20
  index=index,
 
5
  from llama_index.core.postprocessor import SimilarityPostprocessor
6
 
7
  from config import config
8
+ print("rag start loading")
9
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
10
 
11
  Settings.llm = None
 
14
 
15
  documents = SimpleDirectoryReader("rag_data").load_data()
16
  index = VectorStoreIndex.from_documents(documents)
17
+ print("rag loaded")
18
  def get_context(query, top_k = 3):
19
  retriever = VectorIndexRetriever(
20
  index=index,