File size: 4,718 Bytes
07423df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
from llm_studio.src.utils.config_utils import load_config_yaml
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import argparse
import numpy as np
import torch
from llm_studio.src.datasets.text_utils import get_tokenizer
from llm_studio.src.utils.modeling_utils import load_checkpoint, set_generation_config
def parse_param(cfg, prompt):
prompt = prompt.replace("--", "")
parts = prompt.split(" ")
args = [" ".join(parts[i : i + 2]) for i in range(0, len(parts), 2)]
for arg in args:
splitted_arg = arg.split(" ")
setattr(
cfg.prediction,
splitted_arg[0],
type(getattr(cfg.prediction, splitted_arg[0]))(splitted_arg[1]),
)
print(
f"Permanently changed {splitted_arg[0]} to",
getattr(cfg.prediction, splitted_arg[0]),
)
return cfg
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Sample prompting.")
parser.add_argument(
"-e",
"--experiment",
type=str,
required=True,
help="Name of the experiment output folder",
)
parser.add_argument(
"-d", "--device", type=str, required=False, default="cuda:0", help="Device"
)
args, unknown = parser.parse_known_args()
DEVICE = args.device
cfg = load_config_yaml(os.path.join(args.experiment, "cfg.yaml"))
cfg.training.epochs = 0
cfg.environment._device = DEVICE
cfg.environment._local_rank = 0
cfg.tokenizer.padding_quantile = 0
cfg.environment.mixed_precision = True
cfg.architecture.gradient_checkpointing = False
cfg.architecture.pretrained = False
cfg.prediction.max_length_inference = 256
if cfg.dataset.text_prompt_start == "":
cfg.dataset.text_prompt_start = "\n"
# cfg.prediction.min_length_inference = 2
# cfg.prediction.max_length_inference = 256
# cfg.prediction.repetition_penalty = 1.5
# cfg.prediction.temperature = 0.3
# cfg.prediction.num_beams = 2
# cfg.prediction.do_sample = False
# cfg.prediction.top_p = 0.9
# cfg.prediction.top_k = 40
tokenizer = get_tokenizer(cfg)
print("Loading model weights...")
with torch.device(DEVICE):
model = cfg.architecture.model_class(cfg)
cfg.architecture.pretrained_weights = os.path.join(
args.experiment, "checkpoint.pth"
)
load_checkpoint(cfg, model, strict=True)
model = model.to(DEVICE).eval()
model.backbone.use_cache = True
model.backbone = set_generation_config(model.backbone, cfg.prediction)
print()
print("=============")
print(
"You can change inference parameters on the fly by typing --param value, "
"such as --num_beams 4. You can also chain them such as --num_beams 4 "
"--top_k 30."
)
print()
while True:
prompt = input("Please enter some prompt (type 'exit' to stop): ")
try:
if prompt.lower() == "exit":
break
if prompt.lower().startswith("--"):
cfg = parse_param(cfg, prompt)
model.backbone = set_generation_config(model.backbone, cfg.prediction)
continue
prompt = cfg.dataset.dataset_class.parse_prompt(cfg, prompt)
print(prompt)
inputs = cfg.dataset.dataset_class.encode(
tokenizer, prompt, cfg.tokenizer.max_length_prompt, "left"
)
inputs["prompt_input_ids"] = inputs.pop("input_ids").unsqueeze(0).to(DEVICE)
inputs["prompt_attention_mask"] = (
inputs.pop("attention_mask").unsqueeze(0).to(DEVICE)
)
output = {}
with torch.no_grad():
with torch.cuda.amp.autocast():
output["predicted_answer_ids"] = (
model.generate(inputs, cfg).detach().cpu()
)
predicted_text = [
tokenizer.decode(ids, skip_special_tokens=True)
for ids in output["predicted_answer_ids"]
]
output["predicted_text"] = np.array(predicted_text)
output = cfg.dataset.dataset_class.clean_output(output, cfg)
output = output["predicted_text"][0]
print(output)
print()
except Exception as e:
print("Error: {}".format(e))
print("Something went wrong, please try again.")
|