File size: 4,238 Bytes
201bb3e
 
 
 
 
 
 
 
bca3d41
201bb3e
 
 
 
 
 
 
 
 
 
 
 
bca3d41
 
201bb3e
 
 
 
bca3d41
 
 
 
 
 
201bb3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bca3d41
201bb3e
 
 
 
 
 
 
bca3d41
201bb3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pixeltable as pxt
from pixeltable.iterators import DocumentSplitter
from pixeltable.functions import openai
import os
import requests
import tempfile
import gradio as gr

def process_document(pdf_file, api_key, voice_choice, style_choice, chunk_size, temperature, max_tokens, system_prompt, progress=gr.Progress()):
    try:
        os.environ['OPENAI_API_KEY'] = api_key
        
        progress(0.1, desc="Initializing...")
        pxt.drop_dir('document_audio', force=True)
        pxt.create_dir('document_audio')
        
        docs = pxt.create_table(
            'document_audio.documents',
            {
                'document': pxt.Document,
                'voice': pxt.String,
                'style': pxt.String,
                'mode_prompt': pxt.String
            }
        )
        
        progress(0.2, desc="Processing document...")
        docs.insert([{
            'document': pdf_file.name,
            'voice': voice_choice,
            'style': style_choice,
            'mode_prompt': system_prompt
        }])
        
        chunks = pxt.create_view(
            'document_audio.chunks',
            docs,
            iterator=DocumentSplitter.create(
                document=docs.document,
                separators='token_limit',
                limit=chunk_size
            )
        )
        
        progress(0.4, desc="Text processing...")
        chunks['content_response'] = openai.chat_completions(
            messages=[
                {
                    'role': 'system',
                    'content': docs.mode_prompt  # Use the mode-specific prompt
                },
                {'role': 'user', 'content': chunks.text}
            ],
            model='gpt-4o-mini-2024-07-18',
            max_tokens=max_tokens,
            temperature=temperature
        )
        
        chunks['content'] = chunks.content_response['choices'][0]['message']['content']

        progress(0.6, desc="Script generation...")
        chunks['script_response'] = openai.chat_completions(
            messages=[
                {
                    'role': 'system',
                    'content': f"""Convert content to audio script.
                    Style: {docs.style}
                    Format:
                    - Clear sentence structures
                    - Natural pauses (...)
                    - Term definitions when needed
                    - Proper transitions"""
                },
                {'role': 'user', 'content': chunks.content}
            ],
            model='gpt-4o-mini-2024-07-18',
            max_tokens=max_tokens,
            temperature=temperature
        )
        chunks['script'] = chunks.script_response['choices'][0]['message']['content']

        progress(0.8, desc="Audio synthesis...")
        @pxt.udf(return_type=pxt.Audio)
        def generate_audio(script: str, voice: str):
            if not script or not voice:
                return None
            try:
                response = requests.post(
                    "https://api.openai.com/v1/audio/speech",
                    headers={"Authorization": f"Bearer {api_key}"},
                    json={"model": "tts-1", "input": script, "voice": voice}
                )
                if response.status_code == 200:
                    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
                    temp_file.write(response.content)
                    temp_file.close()
                    return temp_file.name
            except Exception as e:
                print(f"Error in audio synthesis: {e}")
            return None

        chunks['audio'] = generate_audio(chunks.script, docs.voice)
        
        audio_path = chunks.select(chunks.audio).tail(1)['audio'][0]
        
        results = chunks.select(
            chunks.content,
            chunks.script
        ).collect()
        
        display_data = [
            [f"Segment {idx + 1}", row['content'], row['script']]
            for idx, row in enumerate(results)
        ]
        
        progress(1.0, desc="Complete")
        return display_data, audio_path, "Processing complete"
        
    except Exception as e:
        return None, None, f"Error: {str(e)}"