Spaces:

ake178178
/

pix2speech

Sleeping

App Files Files Community

pix2speech / app.py

ake178178

Update app.py

ebea9f6 verified 6 months ago

raw

history blame contribute delete

2.07 kB

	import streamlit as st
	from PIL import Image
	import numpy as np
	import io
	import openai
	from gTTs import gTTS
	import tempfile
	import os

	# 请将你的 OpenAI API 密钥放在这里
	# openai.api_key = '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'

	# 功能一：拍照功能
	st.title("拍照并处理图片")
	picture = st.camera_input("拍照")

	if picture:
	# 功能二：将拍照后的照片分辨率设置成300x300像素
	img = Image.open(picture)
	img = img.resize((300, 300))
	st.image(img, caption="Resized Image", use_column_width=True)

	# 将图片保存到内存中
	img_byte_arr = io.BytesIO()
	img.save(img_byte_arr, format='PNG')
	img_byte_arr = img_byte_arr.getvalue()

	# 功能三：上传照片并生成文字结果
	st.write("正在上传照片并生成文字结果...")

	# 生成文字的提示词
	prompt = "请从左到右描述图片中的物体。"

	# 调用 OpenAI API 生成文字描述
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "你是一位图像描述专家。"},
	{"role": "user", "content": f"{prompt} 图像数据将作为背景知识提供。"},
	{"role": "user", "content": "图像数据：图像数据以字节流表示，这里无法显示具体内容。"}
	]
	)

	generated_text = response.choices[0].message['content'].strip()
	st.write("生成的文字结果：")
	st.write(generated_text)

	# 功能四：将文字转换为语音并播放
	st.write("将文字转换为语音并播放")

	tts = gTTS(text=generated_text, lang='zh')
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_file.name)

	audio_file = open(temp_file.name, "rb")
	audio_bytes = audio_file.read()

	st.audio(audio_bytes, format='audio/mp3')

	# 删除临时文件
	os.remove(temp_file.name)