ychenNLP
/

GoLLIE-7B-TF

Feature Extraction

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

ychenNLP commited on Jun 17

Commit

a5a3a73

•

1 Parent(s): aa9e512

Update README.md

Files changed (1) hide show

README.md +47 -2

README.md CHANGED Viewed

@@ -30,7 +30,52 @@ Then you can load the model using
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("HiTZ/GoLLIE-7B")
-model = AutoModelForCausalLM.from_pretrained("HiTZ/GoLLIE-7B", trust_remote_code=True, torch_dtype=torch.bfloat16)
 model.to("cuda")
 ```

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("ychenNLP/GoLLIE-7B-TF")
+model = AutoModelForCausalLM.from_pretrained("HiTZ/GoLLIE-7B-TF", trust_remote_code=True, torch_dtype=torch.bfloat16)
 model.to("cuda")
+test_input = r'''# The following lines describe the task definition
+@dataclass
+class LLM(Entity):
+    """Large language model names or model names. This is used for deep learning and NLP tasks."""
+    span: str  # Such as: "GPT-3.5", "LLama=7B", "ChatGPT"
+@dataclass
+class Hyperparams(Entity):
+    """Hyperparameter used for training large language  models. Including learning rate, scheduler, architecture"""
+    span: str  # Such as: "layernorm", "cosine scheduler"
+# This is the text to analyze
+text = "GoLLIE-7B-TFが本日リリースされました！ 1つのNVIDIA A100 GPUで推論が可能なサイズです 学習率は1e-4です 訓練にはLoRAが使用されています"
+# This is the English translation of the text
+eng_text = "GoLLIE-7B-TF is released today! * Sized for inference on 1 NVIDIA A100 GPUs * learning rate 1e-4 * LoRA is used for training"
+# Using translation and fusion
+# (1) generate annotation for eng_text
+# (2) generate annotation for text
+# The annotation instances that take place in the eng_text above are listed here
+result = [
+'''
+model_input = tokenizer(test_input, return_tensors="pt")
+print(model_input["input_ids"])
+model_input["input_ids"] = model_input["input_ids"][:, :-1]
+model_input["attention_mask"] = model_input["attention_mask"][:, :-1]
+model_ouput = model.generate(
+    **model_input.to(model.device),
+    max_new_tokens=128,
+    do_sample=False,
+    min_new_tokens=0,
+    num_beams=1,
+    num_return_sequences=1,
+)
+print(tokenizer.batch_decode(model_ouput))
 ```