Trudy commited on
Commit
f40f415
Β·
1 Parent(s): 99c245c

added camera env switcher

Browse files
README.md CHANGED
@@ -14,8 +14,6 @@ app_port: 3001
14
 
15
  This repository contains a react-based starter app for using the [Multimodal Live API](https://ai.google.dev/api/multimodal-live) over a websocket. It provides modules for streaming audio playback, recording user media such as from a microphone, webcam or screen capture as well as a unified log view to aid in development of your application.
16
 
17
- [![Multimodal Live API Demo](readme/thumbnail.png)](https://www.youtube.com/watch?v=J_q7JY1XxFE)
18
-
19
  Watch the demo of the Multimodal Live API [here](https://www.youtube.com/watch?v=J_q7JY1XxFE).
20
 
21
  ## Usage
@@ -23,14 +21,21 @@ Watch the demo of the Multimodal Live API [here](https://www.youtube.com/watch?v
23
  To get started, [create a free Gemini API key](https://aistudio.google.com/apikey) and add it to the `.env` file. Then:
24
 
25
  ```
26
- $ npm install && npm start
 
 
 
 
27
  ```
 
 
 
 
 
 
28
 
29
- We have provided several example applications on other branches of this repository:
30
 
31
- - [demos/GenExplainer](https://github.com/google-gemini/multimodal-live-api-web-console/tree/demos/genexplainer)
32
- - [demos/GenWeather](https://github.com/google-gemini/multimodal-live-api-web-console/tree/demos/genweather)
33
- - [demos/GenList](https://github.com/google-gemini/multimodal-live-api-web-console/tree/demos/genlist)
34
 
35
  ## Example
36
 
 
14
 
15
  This repository contains a react-based starter app for using the [Multimodal Live API](https://ai.google.dev/api/multimodal-live) over a websocket. It provides modules for streaming audio playback, recording user media such as from a microphone, webcam or screen capture as well as a unified log view to aid in development of your application.
16
 
 
 
17
  Watch the demo of the Multimodal Live API [here](https://www.youtube.com/watch?v=J_q7JY1XxFE).
18
 
19
  ## Usage
 
21
  To get started, [create a free Gemini API key](https://aistudio.google.com/apikey) and add it to the `.env` file. Then:
22
 
23
  ```
24
+ GEMINI_API_KEY=your_api_key
25
+ ```
26
+
27
+ Since this is a modified version of the [Multimodal Live API Console](https://github.com/google-gemini/multimodal-live-api-web-console), you will need to run the server proxy locally with the frontend built.
28
+
29
  ```
30
+ $ npm run build && npm run start-server
31
+ ```
32
+
33
+ The app should run on `http://localhost:3001`
34
+
35
+ ## Note
36
 
37
+ This is a modified version of the [Multimodal Live API Console](https://github.com/google-gemini/multimodal-live-api-web-console) that uses a server proxy to keep your Gemini API key secure in deployment. The original repository is meant to run entirely as a frontend client.
38
 
 
 
 
39
 
40
  ## Example
41
 
src/components/control-tray/ControlTray.tsx CHANGED
@@ -71,6 +71,8 @@ function ControlTray({
71
  const [inVolume, setInVolume] = useState(0);
72
  const [audioRecorder] = useState(() => new AudioRecorder());
73
  const [muted, setMuted] = useState(false);
 
 
74
  const renderCanvasRef = useRef<HTMLCanvasElement>(null);
75
  const connectButtonRef = useRef<HTMLButtonElement>(null);
76
  const [simulatedVolume, setSimulatedVolume] = useState(0);
@@ -179,6 +181,66 @@ function ControlTray({
179
  };
180
  }, [connected, activeVideoStream, client, videoRef]);
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  //handler for swapping from one video-stream to the next
183
  const changeStreams = (next?: UseMediaStreamResult) => async () => {
184
  if (next) {
@@ -219,13 +281,24 @@ function ControlTray({
219
 
220
  {supportsVideo && (
221
  <>
222
- <MediaStreamButton
223
- isStreaming={screenCapture.isStreaming}
224
- start={changeStreams(screenCapture)}
225
- stop={changeStreams()}
226
- onIcon="cancel_presentation"
227
- offIcon="present_to_all"
228
- />
 
 
 
 
 
 
 
 
 
 
 
229
  <MediaStreamButton
230
  isStreaming={webcam.isStreaming}
231
  start={changeStreams(webcam)}
 
71
  const [inVolume, setInVolume] = useState(0);
72
  const [audioRecorder] = useState(() => new AudioRecorder());
73
  const [muted, setMuted] = useState(false);
74
+ const [videoDevices, setVideoDevices] = useState<MediaDeviceInfo[]>([]);
75
+ const [currentDeviceIndex, setCurrentDeviceIndex] = useState(0);
76
  const renderCanvasRef = useRef<HTMLCanvasElement>(null);
77
  const connectButtonRef = useRef<HTMLButtonElement>(null);
78
  const [simulatedVolume, setSimulatedVolume] = useState(0);
 
181
  };
182
  }, [connected, activeVideoStream, client, videoRef]);
183
 
184
+ // Add effect to track available video devices
185
+ useEffect(() => {
186
+ async function getVideoDevices() {
187
+ console.log('πŸ“Ή Enumerating video devices...');
188
+ try {
189
+ const devices = await navigator.mediaDevices.enumerateDevices();
190
+ const videoInputs = devices.filter(device => device.kind === 'videoinput');
191
+ console.log('πŸ“Ή Available video devices:', videoInputs.length);
192
+ videoInputs.forEach((device, index) => {
193
+ console.log(`πŸ“Ή Device ${index}:`, {
194
+ deviceId: device.deviceId,
195
+ label: device.label
196
+ });
197
+ });
198
+ setVideoDevices(videoInputs);
199
+ } catch (error) {
200
+ console.error('❌ Error enumerating devices:', error);
201
+ }
202
+ }
203
+
204
+ // Get initial device list
205
+ getVideoDevices();
206
+
207
+ // Listen for device changes
208
+ navigator.mediaDevices.addEventListener('devicechange', getVideoDevices);
209
+ return () => {
210
+ navigator.mediaDevices.removeEventListener('devicechange', getVideoDevices);
211
+ };
212
+ }, []);
213
+
214
+ const rotateWebcam = async () => {
215
+ console.log('πŸ”„ Rotating webcam...');
216
+ if (videoDevices.length <= 1) {
217
+ console.log('⚠️ Not enough video devices to rotate');
218
+ return;
219
+ }
220
+
221
+ const nextIndex = (currentDeviceIndex + 1) % videoDevices.length;
222
+ console.log(`🎯 Switching to device index ${nextIndex}`);
223
+
224
+ try {
225
+ const newStream = await navigator.mediaDevices.getUserMedia({
226
+ video: { deviceId: { exact: videoDevices[nextIndex].deviceId } },
227
+ audio: false
228
+ });
229
+
230
+ console.log('βœ… Got new video stream');
231
+ setActiveVideoStream(newStream);
232
+ onVideoStreamChange(newStream);
233
+ setCurrentDeviceIndex(nextIndex);
234
+
235
+ // Clean up old stream
236
+ if (activeVideoStream) {
237
+ activeVideoStream.getTracks().forEach(track => track.stop());
238
+ }
239
+ } catch (error) {
240
+ console.error('❌ Error rotating webcam:', error);
241
+ }
242
+ };
243
+
244
  //handler for swapping from one video-stream to the next
245
  const changeStreams = (next?: UseMediaStreamResult) => async () => {
246
  if (next) {
 
281
 
282
  {supportsVideo && (
283
  <>
284
+ {!isIOSDevice && (
285
+ <MediaStreamButton
286
+ isStreaming={screenCapture.isStreaming}
287
+ start={changeStreams(screenCapture)}
288
+ stop={changeStreams()}
289
+ onIcon="cancel_presentation"
290
+ offIcon="present_to_all"
291
+ />
292
+ )}
293
+ {webcam.isStreaming && videoDevices.length > 1 && (
294
+ <button
295
+ className="action-button"
296
+ onClick={rotateWebcam}
297
+ title="Switch camera"
298
+ >
299
+ <span className="material-symbols-outlined">switch_camera</span>
300
+ </button>
301
+ )}
302
  <MediaStreamButton
303
  isStreaming={webcam.isStreaming}
304
  start={changeStreams(webcam)}