Hyde LLaMa 2 7B Legal

Model Details

Backbone Model: meta-llama/Llama-2-7b-chat

Input: Models input text only.

Output: Models generate text only.

Inference

def hyde_gen(
        topic:str, 
        model:object, 
        tokenizer:object, 
        device:object
    ):
    prompt = (
        f"Write legal facts about the following topic:\n{topic}\n"
    )
    len_prompt = len(prompt)

    output = model.generate(
                **tokenizer(prompt, 
                            return_tensors="pt", 
                            return_token_type_ids=False).to(device
                        ),
        max_new_tokens=300,
        early_stopping=True,
        do_sample=True,
        top_k=10,
        top_p=0.98,
        no_repeat_ngram_size=3,
        eos_token_id=2,
        repetition_penalty=1.1,
        num_beams=3,
    )

    return tokenizer.decode(output[0])[len_prompt:]

def hyde_infer(input_topic):
    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

    model_pth = "akkshay/hyde-llama-7b"
    model = AutoModelForCausalLM.from_pretrained(
                    model_pth, 
                    device_map={"": 0},
                    torch_dtype=torch.float16, 
                    low_cpu_mem_usage=True
    )
    tokenizer = AutoTokenizer.from_pretrained(model_pth)

    model.eval()
    model.config.use_cache = (True)
    tokenizer.pad_token = tokenizer.eos_token
    output = hyde_gen(
                topic=input_topic, 
                model=model, 
                tokenizer=tokenizer, 
                device=device
    )

    return output


if __name__ == "__main__":
    fact = hyde_infer("VW emissions scandal")
    print(fact)

Since Hyde Llama 2 uses 'FastTokenizer' provided by HF tokenizers NOT sentencepiece package, it is required to use use_fast=True option when we initialize the tokenizer.

Lastly Apple M1/M2 chips does not support BF16 computing, so use CPU instead.

Downloads last month
13
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Space using akkshay/hyde-llama-7b 1