bambadij commited on
Commit
9948d06
·
1 Parent(s): 5ff6e68
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -81,29 +81,29 @@ class RequestModel(BaseModel):
81
  @app.post("/generate/")
82
  async def generate_text(request: RequestModel):
83
  # Créer la requête pour l'API NVIDIA
84
- completion = client.chat.completions.create(
85
- model="meta/llama-3.1-8b-instruct",
86
- messages=[{"role": "user", "content": default_prompt + request.text}],
87
- temperature=0.2,
88
- top_p=0.7,
89
- max_tokens=1024,
90
- stream=True
91
- )
92
- # response = ollama.chat(
93
- # model="llama3",
94
- # messages=[
95
- # {
96
- # "role": "user",
97
- # "content": default_prompt + request.text,
98
- # },
99
- # ],
100
  # )
 
 
 
 
 
 
 
 
 
101
  # Générer le texte en temps réel
102
 
103
- generated_text = ""
104
- for chunk in completion:
105
- if chunk.choices[0].delta.content is not None:
106
- generated_text += chunk.choices[0].delta.content
107
 
108
  return {"generated_text": generated_text}
109
  if __name__ == "__main__":
 
81
  @app.post("/generate/")
82
  async def generate_text(request: RequestModel):
83
  # Créer la requête pour l'API NVIDIA
84
+ # completion = client.chat.completions.create(
85
+ # model="meta/llama-3.1-8b-instruct",
86
+ # messages=[{"role": "user", "content": default_prompt + request.text}],
87
+ # temperature=0.2,
88
+ # top_p=0.7,
89
+ # max_tokens=1024,
90
+ # stream=True
 
 
 
 
 
 
 
 
 
91
  # )
92
+ response = ollama.chat(
93
+ model="llama3",
94
+ messages=[
95
+ {
96
+ "role": "user",
97
+ "content": default_prompt + request.text,
98
+ },
99
+ ],
100
+ )
101
  # Générer le texte en temps réel
102
 
103
+ generated_text = response["message"]["content"]
104
+ # for chunk in completion:
105
+ # if chunk.choices[0].delta.content is not None:
106
+ # generated_text += chunk.choices[0].delta.content
107
 
108
  return {"generated_text": generated_text}
109
  if __name__ == "__main__":