llmware
/

dragon-yi-6b-v0

Text Generation

Inference Endpoints

Model card Files Files and versions Community

doberst commited on Nov 14, 2023

Commit

0a98de0

•

1 Parent(s): cffbcd1

Upload generation_test_hf_script.py

Files changed (1) hide show

generation_test_hf_script.py +9 -6

generation_test_hf_script.py CHANGED Viewed

@@ -27,15 +27,19 @@ def load_rag_benchmark_tester_ds():
 def run_test(model_name, test_ds):
-    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto",torch_dtype="auto",trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     for i, entries in enumerate(test_ds):
         # prepare prompt packaging used in fine-tuning process
-        new_prompt = "<human>: " + entries["context"] + "\n" + entries["query"] + "\n" + "<bot>:" + "\n"
         inputs = tokenizer(new_prompt, return_tensors="pt")
         start_of_output = len(inputs.input_ids[0])
@@ -63,7 +67,7 @@ def run_test(model_name, test_ds):
         bot = output_only.find("<bot>:")
         if bot > -1:
             output_only = output_only[bot+len("<bot>:"):]
         # end - post-processing
         print("\n")
@@ -78,7 +82,6 @@ if __name__ == "__main__":
     test_ds = load_rag_benchmark_tester_ds()
     model_name = "llmware/dragon-yi-6b-v0"
     output = run_test(model_name,test_ds)

 def run_test(model_name, test_ds):
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    print("update: model will be loaded on device - ", device)
+    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+    model.to(device)
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     for i, entries in enumerate(test_ds):
         # prepare prompt packaging used in fine-tuning process
+        new_prompt = "<human>: " + entries["context"] + "\n" + entries["query"] + "\n" + "<bot>:"
         inputs = tokenizer(new_prompt, return_tensors="pt")
         start_of_output = len(inputs.input_ids[0])
         bot = output_only.find("<bot>:")
         if bot > -1:
             output_only = output_only[bot+len("<bot>:"):]
         # end - post-processing
         print("\n")
     test_ds = load_rag_benchmark_tester_ds()
     model_name = "llmware/dragon-yi-6b-v0"
     output = run_test(model_name,test_ds)