##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ###### | |
# import aiohttp | |
# import json | |
# import asyncio | |
# import requests | |
# | |
# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): | |
# session = aiohttp.ClientSession() | |
# url = f'{api_base}/api/generate' | |
# data = { | |
# "model": model, | |
# "prompt": prompt, | |
# } | |
# response = "" | |
# try: | |
# async with session.post(url, json=data) as resp: | |
# async for line in resp.content.iter_any(): | |
# if line: | |
# try: | |
# json_chunk = line.decode("utf-8") | |
# chunks = json_chunk.split("\n") | |
# for chunk in chunks: | |
# if chunk.strip() != "": | |
# j = json.loads(chunk) | |
# if "response" in j: | |
# print(j["response"]) | |
# yield { | |
# "role": "assistant", | |
# "content": j["response"] | |
# } | |
# # self.responses.append(j["response"]) | |
# # yield "blank" | |
# except Exception as e: | |
# print(f"Error decoding JSON: {e}") | |
# finally: | |
# await session.close() | |
# async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): | |
# generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?") | |
# response = "" | |
# async for elem in generator: | |
# print(elem) | |
# response += elem["content"] | |
# return response | |
# #generator = get_ollama_response_stream() | |
# result = asyncio.run(get_ollama_response_no_stream()) | |
# print(result) | |
# # return this generator to the client for streaming requests | |
# async def get_response(): | |
# global generator | |
# async for elem in generator: | |
# print(elem) | |
# asyncio.run(get_response()) | |
##### latest implementation of making raw http post requests to local ollama server | |
# import requests | |
# import json | |
# def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): | |
# url = f"{api_base}/api/generate" | |
# data = { | |
# "model": model, | |
# "prompt": prompt, | |
# } | |
# session = requests.Session() | |
# with session.post(url, json=data, stream=True) as resp: | |
# for line in resp.iter_lines(): | |
# if line: | |
# try: | |
# json_chunk = line.decode("utf-8") | |
# chunks = json_chunk.split("\n") | |
# for chunk in chunks: | |
# if chunk.strip() != "": | |
# j = json.loads(chunk) | |
# if "response" in j: | |
# completion_obj = { | |
# "role": "assistant", | |
# "content": "", | |
# } | |
# completion_obj["content"] = j["response"] | |
# yield {"choices": [{"delta": completion_obj}]} | |
# except Exception as e: | |
# print(f"Error decoding JSON: {e}") | |
# session.close() | |
# response = get_ollama_response_stream() | |
# for chunk in response: | |
# print(chunk['choices'][0]['delta']) | |