Spaces:
Running
Running
File size: 2,950 Bytes
efe0924 8910711 efe0924 8910711 efe0924 6a0a9f7 efe0924 80d4e55 efe0924 80d4e55 efe0924 80d4e55 efe0924 8910711 80d4e55 8910711 80d4e55 efe0924 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
Client test.
Run server:
python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6.9b
NOTE: For private models, add --use-auth_token=True
NOTE: --infer_devices=True (default) must be used for multi-GPU in case see failures with cuda:x cuda:y mismatches.
Currently, this will force model to be on a single GPU.
Then run this client as:
python client_test.py
For HF spaces:
HOST="https://h2oai-h2ogpt-chatbot.hf.space" python client_test.py
Result:
Loaded as API: https://h2oai-h2ogpt-chatbot.hf.space ✔
{'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a large language model developed by LAION.'}
For demo:
HOST="https://gpt.h2o.ai" python client_test.py
Result:
Loaded as API: https://gpt.h2o.ai ✔
{'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a chatbot created by LAION.'}
"""
debug = False
import os
os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
def get_client():
from gradio_client import Client
client = Client(os.getenv('HOST', "http://localhost:7860"))
if debug:
print(client.view_api(all_endpoints=True))
return client
def test_client_basic():
instruction = '' # only for chat=True
iinput = '' # only for chat=True
context = ''
# streaming output is supported, loops over and outputs each generation in streaming mode
# but leave stream_output=False for simple input/output mode
stream_output = False
prompt_type = 'human_bot'
temperature = 0.1
top_p = 0.75
top_k = 40
num_beams = 1
max_new_tokens = 50
min_new_tokens = 0
early_stopping = False
max_time = 20
repetition_penalty = 1.0
num_return_sequences = 1
do_sample = True
# only these 2 below used if pass chat=False
chat = False
instruction_nochat = "Who are you?"
iinput_nochat = ''
args = [instruction,
iinput,
context,
stream_output,
prompt_type,
temperature,
top_p,
top_k,
num_beams,
max_new_tokens,
min_new_tokens,
early_stopping,
max_time,
repetition_penalty,
num_return_sequences,
do_sample,
chat,
instruction_nochat,
iinput_nochat,
]
api_name = '/submit_nochat'
client = get_client()
res = client.predict(
*tuple(args),
api_name=api_name,
)
res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
print(res_dict)
return res_dict
import markdown # pip install markdown
from bs4 import BeautifulSoup # pip install beautifulsoup4
def md_to_text(md):
html = markdown.markdown(md)
soup = BeautifulSoup(html, features='html.parser')
return soup.get_text()
if __name__ == '__main__':
test_client_basic()
|