Gokulavelan commited on
Commit
38d9b9a
·
1 Parent(s): aede67d
Files changed (4) hide show
  1. Dockerfile +10 -0
  2. README copy.md +14 -0
  3. main.py +20 -0
  4. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README copy.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Qwen2.5 7b 4bit
3
+ emoji: 🐠
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: qwen for agent
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
main.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Change to your model
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+
11
+ @app.get("/")
12
+ def read_root():
13
+ return {"message": "Chat API is running!"}
14
+
15
+ @app.post("/chat")
16
+ def chat(prompt: str):
17
+ inputs = tokenizer(prompt, return_tensors="pt")
18
+ outputs = model.generate(**inputs, max_new_tokens=100)
19
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
+ return {"response": response}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch