|
import pixeltable as pxt |
|
from pixeltable.iterators import DocumentSplitter |
|
from pixeltable.functions import openai |
|
import os |
|
import requests |
|
import tempfile |
|
import gradio as gr |
|
|
|
def process_document(pdf_file, api_key, voice_choice, style_choice, chunk_size, temperature, max_tokens, system_prompt, progress=gr.Progress()): |
|
try: |
|
os.environ['OPENAI_API_KEY'] = api_key |
|
|
|
progress(0.1, desc="Initializing...") |
|
pxt.drop_dir('document_audio', force=True) |
|
pxt.create_dir('document_audio') |
|
|
|
docs = pxt.create_table( |
|
'document_audio.documents', |
|
{ |
|
'document': pxt.Document, |
|
'voice': pxt.String, |
|
'style': pxt.String, |
|
'mode_prompt': pxt.String |
|
} |
|
) |
|
|
|
progress(0.2, desc="Processing document...") |
|
docs.insert([{ |
|
'document': pdf_file.name, |
|
'voice': voice_choice, |
|
'style': style_choice, |
|
'mode_prompt': system_prompt |
|
}]) |
|
|
|
chunks = pxt.create_view( |
|
'document_audio.chunks', |
|
docs, |
|
iterator=DocumentSplitter.create( |
|
document=docs.document, |
|
separators='token_limit', |
|
limit=chunk_size |
|
) |
|
) |
|
|
|
progress(0.4, desc="Text processing...") |
|
chunks['content_response'] = openai.chat_completions( |
|
messages=[ |
|
{ |
|
'role': 'system', |
|
'content': docs.mode_prompt |
|
}, |
|
{'role': 'user', 'content': chunks.text} |
|
], |
|
model='gpt-4o-mini-2024-07-18', |
|
max_tokens=max_tokens, |
|
temperature=temperature |
|
) |
|
|
|
chunks['content'] = chunks.content_response['choices'][0]['message']['content'] |
|
|
|
progress(0.6, desc="Script generation...") |
|
chunks['script_response'] = openai.chat_completions( |
|
messages=[ |
|
{ |
|
'role': 'system', |
|
'content': f"""Convert content to audio script. |
|
Style: {docs.style} |
|
Format: |
|
- Clear sentence structures |
|
- Natural pauses (...) |
|
- Term definitions when needed |
|
- Proper transitions""" |
|
}, |
|
{'role': 'user', 'content': chunks.content} |
|
], |
|
model='gpt-4o-mini-2024-07-18', |
|
max_tokens=max_tokens, |
|
temperature=temperature |
|
) |
|
chunks['script'] = chunks.script_response['choices'][0]['message']['content'] |
|
|
|
progress(0.8, desc="Audio synthesis...") |
|
@pxt.udf(return_type=pxt.Audio) |
|
def generate_audio(script: str, voice: str): |
|
if not script or not voice: |
|
return None |
|
try: |
|
response = requests.post( |
|
"https://api.openai.com/v1/audio/speech", |
|
headers={"Authorization": f"Bearer {api_key}"}, |
|
json={"model": "tts-1", "input": script, "voice": voice} |
|
) |
|
if response.status_code == 200: |
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') |
|
temp_file.write(response.content) |
|
temp_file.close() |
|
return temp_file.name |
|
except Exception as e: |
|
print(f"Error in audio synthesis: {e}") |
|
return None |
|
|
|
chunks['audio'] = generate_audio(chunks.script, docs.voice) |
|
|
|
audio_path = chunks.select(chunks.audio).tail(1)['audio'][0] |
|
|
|
results = chunks.select( |
|
chunks.content, |
|
chunks.script |
|
).collect() |
|
|
|
display_data = [ |
|
[f"Segment {idx + 1}", row['content'], row['script']] |
|
for idx, row in enumerate(results) |
|
] |
|
|
|
progress(1.0, desc="Complete") |
|
return display_data, audio_path, "Processing complete" |
|
|
|
except Exception as e: |
|
return None, None, f"Error: {str(e)}" |