pbevan11
/

llama-3.1-8b-ocr-correction-merged

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

pbevan11 commited on Jul 30, 2024

Commit

383292f

·

verified ·

1 Parent(s): 2d9dd76

Upload 2 files

Files changed (2) hide show

cog.yaml +12 -0
predict.py +35 -0

cog.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+build:
+  gpu: true
+  python_version: "3.10"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "torch==2.0.1"
+    - "transformers==4.30.2"
+    - "peft==0.4.0"
+predict: "predict.py:Predictor"

predict.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+from peft import AutoModelForCausalLM
+from transformers import AutoTokenizer
+from cog import BasePredictor, Input
+class Predictor(BasePredictor):
+    def setup(self):
+        model_id = 'pbevan11/llama-3-8b-ocr-correction'
+        self.model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+    def predict(self, instruction: str = Input(description="Instruction for the model"),
+                inp: str = Input(description="Input text to correct")) -> str:
+        prompt = self.create_prompt(instruction, inp)
+        input_ids = self.tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
+        out_ids = self.model.generate(input_ids=input_ids, max_new_tokens=5000, do_sample=False)
+        full_output = self.tokenizer.batch_decode(out_ids.detach().cpu().numpy(), skip_special_tokens=True)[0]
+        response_start = full_output.find("### Response:")
+        if response_start != -1:
+            return full_output[response_start + len("### Response:"):]
+        else:
+            return full_output[len(prompt):]
+    def create_prompt(self, instruction, inp):
+        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{instruction}
+### Input:
+{inp}
+### Response:
+"""