Spaces:
Paused
Paused
import openai_async | |
import asyncio | |
import nest_asyncio | |
import torch | |
from transformers import AutoTokenizer | |
tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
def count_tokens(text): | |
input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0) | |
return input_ids.shape[1] | |
def break_up_file_to_chunks(text, chunk_size=2000, overlap=100): | |
tokens = tokenizer.encode(text) | |
num_tokens = len(tokens) | |
chunks = [] | |
for i in range(0, num_tokens, chunk_size - overlap): | |
chunk = tokens[i:i + chunk_size] | |
chunks.append(chunk) | |
return chunks | |
async def summarize_meeting(prompt, timeout, max_tokens): | |
#timeout = 30 | |
temperature = 0.5 | |
#max_tokens = 1000 | |
top_p = 1 | |
frequency_penalty = 0 | |
presence_penalty = 0 | |
# Call the OpenAI GPT-3 API | |
response = await openai_async.complete( | |
api_key = API_KEY, | |
timeout=timeout, | |
payload={ | |
"model": "gpt-3.5-turbo", | |
"prompt": prompt, | |
"temperature": temperature, | |
"max_tokens": max_tokens, | |
"top_p": top_p, | |
"frequency_penalty": frequency_penalty, | |
"presence_penalty": presence_penalty | |
}, | |
) | |
# Return the generated text | |
return response | |
def main_summarizer_meet(text, debug=False): | |
if debug: | |
return "This is a test summary function" | |
prompt_response = [] | |
prompt_tokens = [] | |
chunks = break_up_file_to_chunks(text) | |
for i, chunk in enumerate(chunks): | |
prompt_request = ( | |
f"Summarize this meeting transcript: {tokenizer.decode(chunks[i])}" | |
) | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=30, max_tokens = 1000)) | |
prompt_response.append(response.json()["choices"][0]["text"].strip()) | |
prompt_tokens.append(response.json()["usage"]["total_tokens"]) | |
prompt_request = f"Consoloidate these meeting summaries: {prompt_response}" | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=45, max_tokens = 1000)) | |
return response.json()["choices"][0]["text"].strip() | |
# ----------------------------- | |
def main_summarizer_action_items(text, debug=False): | |
if debug: | |
return "This is a test action items function" | |
action_response = [] | |
action_tokens = [] | |
chunks = break_up_file_to_chunks(text) | |
for i, chunk in enumerate(chunks): | |
prompt_request = f"Provide a list of action items with a due date from the provided meeting transcript text: {tokenizer.decode(chunks[i])}" | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=30, max_tokens = 1000)) | |
action_response.append(response.json()["choices"][0]["text"].strip()) | |
action_tokens.append(response.json()["usage"]["total_tokens"]) | |
return '\n'.join(action_response) |