PawinC commited on
Commit
43bf3d6
·
verified ·
1 Parent(s): 2ba1842

Reduce number of models to 2

Browse files
Files changed (1) hide show
  1. app/main.py +22 -22
app/main.py CHANGED
@@ -15,7 +15,7 @@ from typing import Optional
15
  print("Loading model...")
16
  SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
17
  FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
18
- WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
19
  # n_gpu_layers=28, # Uncomment to use GPU acceleration
20
  # seed=1337, # Uncomment to set a specific seed
21
  # n_ctx=2048, # Uncomment to increase the context window
@@ -45,7 +45,7 @@ def check_sentiment(text):
45
  print("Testing model...")
46
  assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
47
  assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
48
- assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
49
  print("Ready.")
50
 
51
 
@@ -127,23 +127,23 @@ async def ask_gemmaFinanceTH(
127
  return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
128
 
129
 
130
- @app.post('/questions/open-ended')
131
- async def ask_gemmaWild(
132
- prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
133
- temperature: float = Body(0.5, embed=True),
134
- max_new_tokens: int = Body(200, embed=True)
135
- ) -> QuestionResponse:
136
- """
137
- Ask a finetuned Gemma an open-ended question..
138
- NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
139
- """
140
- if prompt:
141
- try:
142
- print(f'Asking GemmaWild with the question "{prompt}"')
143
- result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
144
- print(f"Result: {result}")
145
- return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
146
- except Exception as e:
147
- return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
148
- else:
149
- return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
 
15
  print("Loading model...")
16
  SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
17
  FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
18
+ # WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
19
  # n_gpu_layers=28, # Uncomment to use GPU acceleration
20
  # seed=1337, # Uncomment to set a specific seed
21
  # n_ctx=2048, # Uncomment to increase the context window
 
45
  print("Testing model...")
46
  assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
47
  assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
48
+ # assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
49
  print("Ready.")
50
 
51
 
 
127
  return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
128
 
129
 
130
+ # @app.post('/questions/open-ended')
131
+ # async def ask_gemmaWild(
132
+ # prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
133
+ # temperature: float = Body(0.5, embed=True),
134
+ # max_new_tokens: int = Body(200, embed=True)
135
+ # ) -> QuestionResponse:
136
+ # """
137
+ # Ask a finetuned Gemma an open-ended question..
138
+ # NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
139
+ # """
140
+ # if prompt:
141
+ # try:
142
+ # print(f'Asking GemmaWild with the question "{prompt}"')
143
+ # result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
144
+ # print(f"Result: {result}")
145
+ # return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
146
+ # except Exception as e:
147
+ # return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
148
+ # else:
149
+ # return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))