File size: 4,266 Bytes
b585c7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import pytest
from tests.utils import wrap_test_forked
@pytest.mark.need_tokens
@wrap_test_forked
def test_langchain_simple_h2ogpt():
run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot')
@pytest.mark.need_tokens
@wrap_test_forked
def test_langchain_simple_vicuna():
run_langchain_simple(base_model='junelee/wizard-vicuna-13b', prompt_type='instruct_vicuna')
def run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot'):
"""
:param base_model:
:param prompt_type: prompt_type required for stopping support and correct handling of instruction prompting
:return:
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from src.h2oai_pipeline import H2OTextGenerationPipeline
model_name = base_model
from transformers import AutoConfig
config = AutoConfig.from_pretrained(base_model, token=True,
trust_remote_code=True,
offload_folder="./")
llama_type_from_config = 'llama' in str(config).lower()
llama_type_from_name = "llama" in base_model.lower()
llama_type = llama_type_from_config or llama_type_from_name
if llama_type:
from transformers import LlamaForCausalLM, LlamaTokenizer
model_loader = LlamaForCausalLM
tokenizer_loader = LlamaTokenizer
else:
model_loader = AutoModelForCausalLM
tokenizer_loader = AutoTokenizer
load_in_8bit = True
n_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 0
device = 'cpu' if n_gpus == 0 else 'cuda'
device_map = {"": 0} if device == 'cuda' else "auto"
tokenizer = tokenizer_loader.from_pretrained(model_name, padding_side="left")
model = model_loader.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map=device_map,
load_in_8bit=load_in_8bit)
gen_kwargs = dict(max_new_tokens=512, return_full_text=True, early_stopping=False)
pipe = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer, prompt_type=prompt_type,
base_model=base_model, **gen_kwargs)
# below makes it listen only to our prompt removal,
# not built in prompt removal that is less general and not specific for our model
pipe.task = "text2text-generation"
# create llm for LangChain
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)
# Setup QA
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
# NOTE: Instruct-tuned models don't need excessive many-shot examples that waste context space
template = """
==
{context}
==
{question}"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=template,
)
chain = load_qa_chain(llm, prompt=prompt)
docs = [] # could have been some Documents from LangChain inputted from some sources
query = "Give detailed list of reasons for who is smarter, Einstein or Newton."
chain_kwargs = dict(input_documents=docs, question=query)
answer = chain(chain_kwargs)
print(answer)
if 'vicuna' in base_model:
res1 = 'Both Albert Einstein and Sir Isaac Newton were brilliant scientists' in answer[
'output_text'] and "Newton" in answer['output_text']
res2 = 'Both Albert Einstein and Sir Isaac Newton are considered two' in answer[
'output_text'] and "Newton" in answer['output_text']
res4 = res3 = False
else:
res1 = 'Einstein was a genius who revolutionized physics' in answer['output_text'] and "Newton" in answer[
'output_text']
res2 = 'Einstein and Newton are two of the most famous scientists in history' in answer[
'output_text'] and "Newton" in answer['output_text']
res3 = 'Einstein is considered to be the smartest person' in answer[
'output_text'] and "Newton" in answer['output_text']
res4 = 'Einstein was a brilliant scientist' in answer[
'output_text'] and "Newton" in answer['output_text']
assert res1 or res2 or res3 or res4
|