--- license: mit pipeline_tag: text-generation metrics: - accuracy --- # code from unsloth import FastLanguageModel import torch max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ load_in_4bit = True model, tokenizer = FastLanguageModel.from_pretrained( model_name = "rohansb10/python_code_gen_using_llama-3.1_8b", # YOUR MODEL YOU USED FOR TRAINING max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) FastLanguageModel.for_inference(model) # Enable native 2x faster inference # alpaca_prompt = You MUST copy from above! alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN def formatting_prompts_func(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [] for instruction, input, output in zip(instructions, inputs, outputs): # Must add EOS_TOKEN, otherwise your generation will go on forever! text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN texts.append(text) return { "text" : texts, } inputs = tokenizer( [ alpaca_prompt.format( "Develop a function in Python that prints out the Pascal's triangle for a given number of rows.", # instruction "", # input "", # output - leave this blank for generation! ) ], return_tensors = "pt") from transformers import TextStreamer text_streamer = TextStreamer(tokenizer) _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)