FinancialSupport commited on
Commit
80d1253
1 Parent(s): 511ab6a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -30
app.py CHANGED
@@ -4,9 +4,9 @@ import copy
4
  import time
5
  import llama_cpp
6
  from llama_cpp import Llama
7
- from huggingface_hub import hf_hub_download
8
 
9
- llm = Llama(
10
  model_path=hf_hub_download(
11
  repo_id="FinancialSupport/saiga-7b-gguf",
12
  filename="saiga-7b.Q4_K_M.gguf",
@@ -14,11 +14,33 @@ llm = Llama(
14
  n_ctx=4086,
15
  )
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  history = []
18
 
19
  def generate_text(message, history):
20
  temp = ""
21
- input_prompt = "Conversazione tra umano ed un assistente AI di nome saiaga-7b\n"
22
  for interaction in history:
23
  input_prompt += "[|Umano|] " + interaction[0] + "\n"
24
  input_prompt += "[|Assistente|]" + interaction[1]
@@ -27,19 +49,28 @@ def generate_text(message, history):
27
 
28
  print(input_prompt)
29
 
30
- output = llm(
31
- input_prompt,
32
- temperature=0.15,
33
- top_p=0.1,
34
- top_k=40,
35
- repeat_penalty=1.1,
36
- max_tokens=1024,
37
- stop=[
38
- "[|Umano|]",
39
- "[|Assistente|]",
40
- ],
41
- stream=True,
42
- )
 
 
 
 
 
 
 
 
 
43
  for out in output:
44
  stream = copy.deepcopy(out)
45
  temp += stream["choices"][0]["text"]
@@ -48,19 +79,37 @@ def generate_text(message, history):
48
  history = ["init", input_prompt]
49
 
50
 
51
- demo = gr.ChatInterface(
52
- generate_text,
53
- title="saiga-7b running on CPU (quantized Q4_K)",
54
- description="This is a quantized version of saiga-7b running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
55
- examples=[
56
- "Dammi 3 idee di ricette che posso fare con i pistacchi",
57
- "Prepara un piano di esercizi da poter fare a casa",
58
- "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
59
- ],
60
- cache_examples=False,
61
- retry_btn=None,
62
- undo_btn="Delete Previous",
63
- clear_btn="Clear",
64
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  demo.queue(concurrency_count=1, max_size=5)
66
  demo.launch()
 
4
  import time
5
  import llama_cpp
6
  from llama_cpp import Llama
7
+ from huggingface_hub import hf_hub_download
8
 
9
+ saiga = Llama(
10
  model_path=hf_hub_download(
11
  repo_id="FinancialSupport/saiga-7b-gguf",
12
  filename="saiga-7b.Q4_K_M.gguf",
 
14
  n_ctx=4086,
15
  )
16
 
17
+ dante = Llama(
18
+ model_path=hf_hub_download(
19
+ repo_id="FinancialSupport/saiga-7b-gguf",
20
+ filename="saigaDante-7b.Q4_K_M.gguf",
21
+ ),
22
+ n_ctx=4086,
23
+ )
24
+
25
+ karg = {
26
+ 'input_prompt': input_prompt,
27
+ 'temperature': 0.15,
28
+ 'top_p': 0.1,
29
+ 'top_k': 40,
30
+ 'repeat_penalty': 1.1,
31
+ 'max_tokens': 1024,
32
+ 'stop': [
33
+ "[|Umano|]",
34
+ "[|Assistente|]",
35
+ ],
36
+ 'stream': True
37
+ }
38
+
39
  history = []
40
 
41
  def generate_text(message, history):
42
  temp = ""
43
+ input_prompt = "Conversazione tra umano ed un assistente AI di nome saiga-7b\n"
44
  for interaction in history:
45
  input_prompt += "[|Umano|] " + interaction[0] + "\n"
46
  input_prompt += "[|Assistente|]" + interaction[1]
 
49
 
50
  print(input_prompt)
51
 
52
+ output = saiga(**karg)
53
+
54
+ for out in output:
55
+ stream = copy.deepcopy(out)
56
+ temp += stream["choices"][0]["text"]
57
+ yield temp
58
+
59
+ history = ["init", input_prompt]
60
+
61
+ def generate_text_Dante(message, history):
62
+ temp = ""
63
+ input_prompt = "Conversazione tra umano ed un assistente AI di nome saiga-7b\n"
64
+ for interaction in history:
65
+ input_prompt += "[|Umano|] " + interaction[0] + "\n"
66
+ input_prompt += "[|Assistente|]" + interaction[1]
67
+
68
+ input_prompt += "[|Umano|] " + message + "\n[|Assistente|]"
69
+
70
+ print(input_prompt)
71
+
72
+ output = dante(**karg)
73
+
74
  for out in output:
75
  stream = copy.deepcopy(out)
76
  temp += stream["choices"][0]["text"]
 
79
  history = ["init", input_prompt]
80
 
81
 
82
+ with gr.Blocks() as demo:
83
+ with gr.Tab('saiga'):
84
+ gr.ChatInterface(
85
+ generate_text,
86
+ title="saiga-7b running on CPU (quantized Q4_K)",
87
+ description="This is a quantized version of saiga-7b running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
88
+ examples=[
89
+ "Dammi 3 idee di ricette che posso fare con i pistacchi",
90
+ "Prepara un piano di esercizi da poter fare a casa",
91
+ "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
92
+ ],
93
+ cache_examples=True,
94
+ retry_btn=None,
95
+ undo_btn="Delete Previous",
96
+ clear_btn="Clear",
97
+ )
98
+ with gr.Tab('Dante'):
99
+ gr.ChatInterface(
100
+ generate_text_Dante,
101
+ title="saigaDante-7b running on CPU (quantized Q4_K)",
102
+ description="This is a quantized version of saiga-7b with Dante LoRA attached running on CPU (very slow).",
103
+ examples=[
104
+ "Traduci in volgare fiorentino: tanto va la gatta al lardo che ci lascia lo zampino", #se trovi un esempio di traduzione valido mettilo!
105
+ "Traduci in volgare fiorentino: come preparo la pasta alla carbonara?",
106
+ "Traduci in volgare fiorentino: raccontami una fiaba su Firenze"
107
+ ],
108
+ cache_examples=False,
109
+ retry_btn=None,
110
+ undo_btn="Delete Previous",
111
+ clear_btn="Clear",
112
+ )
113
+
114
  demo.queue(concurrency_count=1, max_size=5)
115
  demo.launch()