from transformers import AutoTokenizer, AutoConfig,AutoModel
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
from transformers import AutoConfig, AutoModelForCausalLM,LlamaForCausalLM,LlamaTokenizer
from tokenizers import Tokenizer
from datasets import load_dataset
dna_ft_dataset = load_dataset('dnagpt/dna_multi_task_finetune')
data = dna_ft_dataset["train"].train_test_split(train_size=0.1, seed=42)
tokenizer = LlamaTokenizer.from_pretrained("dnagpt/llama-dna-sft")
tokenizer.pad_token = tokenizer.eos_token
model = LlamaForCausalLM.from_pretrained("dnagpt/llama-dna-sft") #sft
#构建提示词
def format_input(entry):
instruction_text = (
f"Below is an instruction that describes a task. "
f"Write a response that appropriately completes the request."
f"\n\n### Instruction:\n{entry['instruction']}"
)
input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
return instruction_text + input_text + "\n\n### Response:\n"
#构建提示词
def build_prompt(entry):
input_data = format_input(entry)
desired_response = entry['output']
return input_data + desired_response
example = data["test"][0]
prompt = build_prompt(example)
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=1000):
# Tokenize
input_ids = tokenizer.encode(
text,
return_tensors="pt",
truncation=True,
max_length=max_input_tokens
# return_attention_mask=True,
)
# Generate
device = model.device
generated_tokens_with_prompt = model.generate(
input_ids=input_ids.to(device),
#max_length=max_output_tokens,
max_new_tokens=8,
temperature=0.01 # 控制生成的多样性
)
# Decode
generated_text_with_prompt = tokenizer.decode(generated_tokens_with_prompt[0], skip_special_tokens=True)
generated_text_answer = generated_text_with_prompt[len(text):]
return generated_text_answer
input_text = format_input(data["test"][0])
print("input (test):", input_text)
print("real answer:", data["test"][0]["output"])
print("--------------------------\n")
print("model's answer: \n")
print(inference(input_text, model, tokenizer))
- Downloads last month
- 7
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API:
The model has no library tag.