Spaces:

Marroco93
/

PacmanAI-2

Sleeping

App Files Files Community

Marroco93 commited on Apr 1, 2024

Commit

bc5e3f5

1 Parent(s): 0f34bf3

llama2

Browse files

Files changed (2) hide show

main.py +15 -10
requirements.txt +2 -1

main.py CHANGED Viewed

@@ -5,11 +5,13 @@ from huggingface_hub import InferenceClient
 import uvicorn
 from typing import Generator
 import json  # Asegúrate de que esta línea esté al principio del archivo
 app = FastAPI()
 # Initialize the InferenceClient with your model
-client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 class Item(BaseModel):
     prompt: str
@@ -21,12 +23,16 @@ class Item(BaseModel):
     repetition_penalty: float = 1.0
 def format_prompt(message, history):
-    prompt = "<s>"
     for user_prompt, bot_response in history:
-        prompt += f"[INST] {user_prompt} [/INST]"
-        prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {message} [/INST]"
-    return prompt
 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
@@ -41,17 +47,16 @@ def generate_stream(item: Item) -> Generator[bytes, None, None]:
     # Stream the response from the InferenceClient
     for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True):
-        # This assumes 'details=True' gives you a structure where you can access the text like this
         chunk = {
             "text": response.token.text,
-            "complete": response.generated_text is not None  # Adjust based on how you detect completion
         }
         yield json.dumps(chunk).encode("utf-8") + b"\n"
 @app.post("/generate/")
 async def generate_text(item: Item):
-    # Stream response back to the client
     return StreamingResponse(generate_stream(item), media_type="application/x-ndjson")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import uvicorn
 from typing import Generator
 import json  # Asegúrate de que esta línea esté al principio del archivo
+import torch
 app = FastAPI()
 # Initialize the InferenceClient with your model
+client = InferenceClient("meta-llama/Llama-2-7b-chat")
 class Item(BaseModel):
     prompt: str
     repetition_penalty: float = 1.0
 def format_prompt(message, history):
+    # Simple structure: alternating lines of dialogue, no special tokens unless specified by the model documentation
+    conversation = ""
     for user_prompt, bot_response in history:
+        conversation += f"User: {user_prompt}\nBot: {bot_response}\n"
+    conversation += f"User: {message}"
+    return conversation
+# No changes needed in the format_prompt function unless the new model requires different prompt formatting
 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     # Stream the response from the InferenceClient
     for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True):
+        # Check if the 'details' flag and response structure are the same for the new model
         chunk = {
             "text": response.token.text,
+            "complete": response.generated_text is not None
         }
         yield json.dumps(chunk).encode("utf-8") + b"\n"
 @app.post("/generate/")
 async def generate_text(item: Item):
     return StreamingResponse(generate_stream(item), media_type="application/x-ndjson")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 fastapi
 uvicorn
 huggingface_hub
-pydantic

 fastapi
 uvicorn
 huggingface_hub
+pydantic
+torch==2.0.0