File size: 3,117 Bytes
3dfde99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import openai_async
import asyncio
import nest_asyncio

import torch
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2")

def count_tokens(text):
    input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0)
    return input_ids.shape[1]

def break_up_file_to_chunks(text, chunk_size=2000, overlap=100):
    tokens = tokenizer.encode(text)
    num_tokens = len(tokens)
    chunks = []
    for i in range(0, num_tokens, chunk_size - overlap):
        chunk = tokens[i:i + chunk_size]
        chunks.append(chunk)
    
    return chunks

async def summarize_meeting(prompt, timeout, max_tokens):
    
    #timeout = 30
    temperature = 0.5
    #max_tokens = 1000
    top_p = 1
    frequency_penalty = 0
    presence_penalty = 0
    
    # Call the OpenAI GPT-3 API
    response = await openai_async.complete(
        api_key = API_KEY,
        timeout=timeout,
        payload={
            "model": "gpt-3.5-turbo",
            "prompt": prompt,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty
        },
    )

    # Return the generated text
    return response

def main_summarizer_meet(text, debug=False):
    if debug:
        return "This is a test summary function"
    prompt_response = []
    prompt_tokens = []

    chunks = break_up_file_to_chunks(text)

    for i, chunk in enumerate(chunks):
        prompt_request = (
            f"Summarize this meeting transcript: {tokenizer.decode(chunks[i])}"
        )

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

        response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=30, max_tokens = 1000))

        prompt_response.append(response.json()["choices"][0]["text"].strip())
        prompt_tokens.append(response.json()["usage"]["total_tokens"])

    prompt_request = f"Consoloidate these meeting summaries: {prompt_response}"

    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=45, max_tokens = 1000))
    return response.json()["choices"][0]["text"].strip()

# -----------------------------

def main_summarizer_action_items(text, debug=False):
    
    if debug:
        return "This is a test action items function"

    action_response = []
    action_tokens = []

    chunks = break_up_file_to_chunks(text)

    for i, chunk in enumerate(chunks):
        prompt_request = f"Provide a list of action items with a due date from the provided meeting transcript text: {tokenizer.decode(chunks[i])}"

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        response = loop.run_until_complete(summarize_meeting(prompt = prompt_request, timeout=30, max_tokens = 1000))

        action_response.append(response.json()["choices"][0]["text"].strip())
        action_tokens.append(response.json()["usage"]["total_tokens"])

    return '\n'.join(action_response)