phamngoctukts commited on
Commit
adf8038
1 Parent(s): 0f9fd16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -30,9 +30,6 @@ class AppState:
30
  conversation:list = field(default_factory=list)
31
  recording: bool = False # Thêm thuộc tính recording
32
  pause_threshold: float = 1 # Thêm thuộc tính pause_threshold
33
- strength: float = 1.0
34
- ckpt:list = field(default_factory=list)
35
- guidance: float = 8
36
 
37
  def run_vad(ori_audio, sr):
38
  _st = time.time()
@@ -65,7 +62,7 @@ def determine_pause(audio:np.ndarray,sampling_rate:int,state:AppState) -> bool:
65
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
66
  return (duration - dur_vad) > state.pause_threshold # Sử dụng state.pause_threshold
67
 
68
- def process_audio(audio:tuple,state:AppState,image:Image, streng:float,ckpt,guidance):
69
  if state.recording: # Kiểm tra state.stream:
70
  time.sleep(0.1)
71
  if state.stream is None:
@@ -74,9 +71,6 @@ def process_audio(audio:tuple,state:AppState,image:Image, streng:float,ckpt,guid
74
  else:
75
  state.stream = np.concatenate((state.stream, audio[1]))
76
  state.image_in=image
77
- state.strength=streng
78
- state.ckpt=ckpt
79
- state.guidance=guidance
80
  pause_detected = determine_pause(state.stream, state.sampling_rate, state)
81
  state.pause_detected = pause_detected
82
  if state.pause_detected and state.started_talking:
@@ -146,7 +140,10 @@ def response_audio(state:AppState):
146
  return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
147
  if state.painting is True:
148
  promptx = prompt_hugingface(textin,"Hugging Face","Qwen/Qwen2.5-72B-Instruct","Medium")
149
- img=resize(state.image_in)
 
 
 
150
  state.image_out = render.generate_images(textin, img)
151
  audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+textin+" có đẹp không")
152
  return state, audio_bytes
@@ -173,7 +170,10 @@ def response_text(state:AppState,textin,image:Image, prompt, progress=gr.Progres
173
  if state.painting is True:
174
  state.conversation.append({"role": "user", "content":"Bạn: " + textin})
175
  #state.image_out = generate_image(textin, image, streng, ckpt,guidance)
176
- img=resize(image)
 
 
 
177
  image_out = render.generate_images(textin, img)
178
  state.image_out = image_out
179
  audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+prompt+" có đẹp không")
 
30
  conversation:list = field(default_factory=list)
31
  recording: bool = False # Thêm thuộc tính recording
32
  pause_threshold: float = 1 # Thêm thuộc tính pause_threshold
 
 
 
33
 
34
  def run_vad(ori_audio, sr):
35
  _st = time.time()
 
62
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
63
  return (duration - dur_vad) > state.pause_threshold # Sử dụng state.pause_threshold
64
 
65
+ def process_audio(audio:tuple,state:AppState,image:Image):
66
  if state.recording: # Kiểm tra state.stream:
67
  time.sleep(0.1)
68
  if state.stream is None:
 
71
  else:
72
  state.stream = np.concatenate((state.stream, audio[1]))
73
  state.image_in=image
 
 
 
74
  pause_detected = determine_pause(state.stream, state.sampling_rate, state)
75
  state.pause_detected = pause_detected
76
  if state.pause_detected and state.started_talking:
 
140
  return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
141
  if state.painting is True:
142
  promptx = prompt_hugingface(textin,"Hugging Face","Qwen/Qwen2.5-72B-Instruct","Medium")
143
+ if state.image_in:
144
+ img=resize(state.image_in)
145
+ else:
146
+ img=None
147
  state.image_out = render.generate_images(textin, img)
148
  audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+textin+" có đẹp không")
149
  return state, audio_bytes
 
170
  if state.painting is True:
171
  state.conversation.append({"role": "user", "content":"Bạn: " + textin})
172
  #state.image_out = generate_image(textin, image, streng, ckpt,guidance)
173
+ if image:
174
+ img=resize(image)
175
+ else:
176
+ img=None
177
  image_out = render.generate_images(textin, img)
178
  state.image_out = image_out
179
  audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+prompt+" có đẹp không")