Spaces:
No application file
No application file
π» cheers
Browse files- app.py +28 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from threading import Thread
|
2 |
+
|
3 |
+
from closedai import ClosedAIPipeline
|
4 |
+
from closedai.server import app, data # noqa
|
5 |
+
|
6 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer, TextIteratorStreamer, pipeline
|
7 |
+
|
8 |
+
class LlamaPipeline(ClosedAIPipeline):
|
9 |
+
def __init__(self):
|
10 |
+
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
11 |
+
model = LlamaForCausalLM.from_pretrained("decapoda-research/llama-7b-hf")
|
12 |
+
self.streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
13 |
+
self.pipe = pipeline(
|
14 |
+
'text-generation',
|
15 |
+
model=model,
|
16 |
+
tokenizer=tokenizer,
|
17 |
+
streamer=self.streamer,
|
18 |
+
device="auto"
|
19 |
+
)
|
20 |
+
|
21 |
+
def generate_completion(self, text, **generate_kwargs):
|
22 |
+
thread = Thread(target=self.pipe.__call__, kwargs=dict(text_inputs=text, **generate_kwargs))
|
23 |
+
thread.start()
|
24 |
+
for new_text in self.streamer:
|
25 |
+
yield new_text
|
26 |
+
|
27 |
+
pipeline = LlamaPipeline()
|
28 |
+
data["pipeline"] = pipeline
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/nateraw/closedai.git
|
2 |
+
git+https://github.com/huggingface/transformers.git
|
3 |
+
sentencepiece
|
4 |
+
accelerate
|