Spaces:
Sleeping
Sleeping
phamngoctukts
commited on
Commit
•
adf8038
1
Parent(s):
0f9fd16
Update app.py
Browse files
app.py
CHANGED
@@ -30,9 +30,6 @@ class AppState:
|
|
30 |
conversation:list = field(default_factory=list)
|
31 |
recording: bool = False # Thêm thuộc tính recording
|
32 |
pause_threshold: float = 1 # Thêm thuộc tính pause_threshold
|
33 |
-
strength: float = 1.0
|
34 |
-
ckpt:list = field(default_factory=list)
|
35 |
-
guidance: float = 8
|
36 |
|
37 |
def run_vad(ori_audio, sr):
|
38 |
_st = time.time()
|
@@ -65,7 +62,7 @@ def determine_pause(audio:np.ndarray,sampling_rate:int,state:AppState) -> bool:
|
|
65 |
print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
|
66 |
return (duration - dur_vad) > state.pause_threshold # Sử dụng state.pause_threshold
|
67 |
|
68 |
-
def process_audio(audio:tuple,state:AppState,image:Image
|
69 |
if state.recording: # Kiểm tra state.stream:
|
70 |
time.sleep(0.1)
|
71 |
if state.stream is None:
|
@@ -74,9 +71,6 @@ def process_audio(audio:tuple,state:AppState,image:Image, streng:float,ckpt,guid
|
|
74 |
else:
|
75 |
state.stream = np.concatenate((state.stream, audio[1]))
|
76 |
state.image_in=image
|
77 |
-
state.strength=streng
|
78 |
-
state.ckpt=ckpt
|
79 |
-
state.guidance=guidance
|
80 |
pause_detected = determine_pause(state.stream, state.sampling_rate, state)
|
81 |
state.pause_detected = pause_detected
|
82 |
if state.pause_detected and state.started_talking:
|
@@ -146,7 +140,10 @@ def response_audio(state:AppState):
|
|
146 |
return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
|
147 |
if state.painting is True:
|
148 |
promptx = prompt_hugingface(textin,"Hugging Face","Qwen/Qwen2.5-72B-Instruct","Medium")
|
149 |
-
|
|
|
|
|
|
|
150 |
state.image_out = render.generate_images(textin, img)
|
151 |
audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+textin+" có đẹp không")
|
152 |
return state, audio_bytes
|
@@ -173,7 +170,10 @@ def response_text(state:AppState,textin,image:Image, prompt, progress=gr.Progres
|
|
173 |
if state.painting is True:
|
174 |
state.conversation.append({"role": "user", "content":"Bạn: " + textin})
|
175 |
#state.image_out = generate_image(textin, image, streng, ckpt,guidance)
|
176 |
-
|
|
|
|
|
|
|
177 |
image_out = render.generate_images(textin, img)
|
178 |
state.image_out = image_out
|
179 |
audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+prompt+" có đẹp không")
|
|
|
30 |
conversation:list = field(default_factory=list)
|
31 |
recording: bool = False # Thêm thuộc tính recording
|
32 |
pause_threshold: float = 1 # Thêm thuộc tính pause_threshold
|
|
|
|
|
|
|
33 |
|
34 |
def run_vad(ori_audio, sr):
|
35 |
_st = time.time()
|
|
|
62 |
print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
|
63 |
return (duration - dur_vad) > state.pause_threshold # Sử dụng state.pause_threshold
|
64 |
|
65 |
+
def process_audio(audio:tuple,state:AppState,image:Image):
|
66 |
if state.recording: # Kiểm tra state.stream:
|
67 |
time.sleep(0.1)
|
68 |
if state.stream is None:
|
|
|
71 |
else:
|
72 |
state.stream = np.concatenate((state.stream, audio[1]))
|
73 |
state.image_in=image
|
|
|
|
|
|
|
74 |
pause_detected = determine_pause(state.stream, state.sampling_rate, state)
|
75 |
state.pause_detected = pause_detected
|
76 |
if state.pause_detected and state.started_talking:
|
|
|
140 |
return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
|
141 |
if state.painting is True:
|
142 |
promptx = prompt_hugingface(textin,"Hugging Face","Qwen/Qwen2.5-72B-Instruct","Medium")
|
143 |
+
if state.image_in:
|
144 |
+
img=resize(state.image_in)
|
145 |
+
else:
|
146 |
+
img=None
|
147 |
state.image_out = render.generate_images(textin, img)
|
148 |
audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+textin+" có đẹp không")
|
149 |
return state, audio_bytes
|
|
|
170 |
if state.painting is True:
|
171 |
state.conversation.append({"role": "user", "content":"Bạn: " + textin})
|
172 |
#state.image_out = generate_image(textin, image, streng, ckpt,guidance)
|
173 |
+
if image:
|
174 |
+
img=resize(image)
|
175 |
+
else:
|
176 |
+
img=None
|
177 |
image_out = render.generate_images(textin, img)
|
178 |
state.image_out = image_out
|
179 |
audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+prompt+" có đẹp không")
|