File size: 1,667 Bytes
058640f
b884c59
 
 
 
 
058640f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b884c59
058640f
 
 
 
 
 
 
b884c59
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from gradio_client import Client
system_prompt = """You are a helpful assistant, you will use the provided context only to answer user questions.
    Read the given context before answering questions and think step by step. you could get context or question in other language than english.
    Answer only if the question related to the contexts, don't use your own data ..if the question isn't related to the context ,respond with "sorry..no provided context for this question".
    If you can not answer a user question based on provided context only , inform the user.
    Do not use any other information for answering user. Provide a detailed answer to the question."""
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
instruction = """
    Context: {context}
    User: {question}"""
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
def connect_to_llama(query,context):
	client = Client("https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/5c42d8wx6/")
	result = client.predict(
		"""
question:"{}"
context:"{}"
answer:
        """.format(query, context),  # str  in 'parameter_7' Textbox component
		prompt_template ,  # str  in 'Optional system prompt' Textbox component
		4096,  # int | float (numeric value between 0 and 4096) in 'Max new tokens' Slider componentو
		0.1,
		0.05,
		1,  # int | float (numeric value between 0.0 and 1) in 'Top-p (nucleus sampling)' Slider component
		1,  # int | float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
		api_name="/chat"
	)
	# print(time.time() - old, "sec")
	return result