seamless-streaming

Running on T4

seamless-streaming / streaming-react-app /src /react-xr /XRRendering.ts

Mark Duppenthaler

Port over VR performance fixes and cleanup

34021fc 11 months ago

11 kB

	import * as THREE from 'three';
	import {OrbitControls} from 'three/examples/jsm/controls/OrbitControls.js';

	import ThreeMeshUI, {Block, Text} from 'three-mesh-ui';

	import FontJSON from '../assets/RobotoMono-Regular-msdf.json?url';
	import FontImage from '../assets/RobotoMono-Regular.png';
	import {TranslationSentences} from '../types/StreamingTypes';
	import supportedCharSet from './supportedCharSet';

	// Augment three-mesh-ui types which aren't implemented
	declare module 'three-mesh-ui' {
	interface Block {
	add(any: any);
	set(props: BlockOptions);
	position: {
	x: number;
	y: number;
	z: number;
	set: (x: number, y: number, z: number) => void;
	};
	}
	interface Text {
	set(props: {content: string});
	}
	}

	// Various configuration parameters
	const INITIAL_PROMPT = 'Listening...\n';
	const NUM_LINES = 3;
	const CHARS_PER_LINE = 37;
	const CHARS_PER_SECOND = 15;

	const MAX_WIDTH = 0.89;
	const CHAR_WIDTH = 0.0233;
	const Y_COORD_START = -0.38;
	const Z_COORD = -1.3;
	const LINE_HEIGHT = 0.062;
	const BLOCK_SPACING = 0.02;
	const FONT_SIZE = 0.038;

	// Speed of scrolling of text lines
	const SCROLL_Y_DELTA = 0.01;

	// Overlay an extra block for padding due to inflexibilities of native padding
	const OFFSET = 0.01;
	const OFFSET_WIDTH = OFFSET * 3;

	// The tick interval
	const CURSOR_BLINK_INTERVAL_MS = 500;

	type TranscriptState = {
	translationText: string;
	textBlocksProps: TextBlockProps[];
	lastTranslationStringIndex: number;
	lastTranslationLineStartIndex: number;
	transcriptLines: string[];
	lastUpdateTime: number;
	};

	type TextBlockProps = {
	content: string;
	// The end position when animating
	targetY: number;
	// Current scroll position that caps at targetY
	currentY: number;
	textOpacity: number;
	backgroundOpacity: number;
	index: number;
	isBottomLine: boolean;
	};

	function initialTextBlockProps(count: number): TextBlockProps[] {
	return Array.from({length: count}).map(() => {
	// Push in non display blocks because mesh UI crashes if elements are add / removed from screen.

	return {
	// key: textBlocksProps.length,
	targetY: Y_COORD_START,
	currentY: Y_COORD_START,
	index: 0,
	textOpacity: 0,
	backgroundOpacity: 0,
	width: MAX_WIDTH,
	height: LINE_HEIGHT,
	content: '',
	isBottomLine: true,
	};
	});
	}

	function initialState(): TranscriptState {
	return {
	translationText: '',
	textBlocksProps: initialTextBlockProps(NUM_LINES),
	lastTranslationStringIndex: 0,
	lastTranslationLineStartIndex: 0,
	transcriptLines: [],
	lastUpdateTime: new Date().getTime(),
	};
	}

	let transcriptState: TranscriptState = initialState();

	let scene: THREE.Scene \| null;
	let camera: THREE.PerspectiveCamera \| null;
	let renderer: THREE.WebGLRenderer \| null;
	let controls: THREE.OrbitControls \| null;

	let cursorBlinkOn: boolean = false;

	setInterval(() => {
	cursorBlinkOn = !cursorBlinkOn;
	}, CURSOR_BLINK_INTERVAL_MS);

	type TextBlock = {
	textBlockOuterContainer: Block;
	textBlockInnerContainer: Block;
	text: Text;
	};
	const textBlocks: TextBlock[] = [];

	export function getRenderer(): THREE.WebGLRenderer \| null {
	return renderer;
	}

	export function init(
	width: number,
	height: number,
	parentElement: HTMLDivElement \| null,
	): THREE.WebGLRenderer {
	scene = new THREE.Scene();
	scene.background = new THREE.Color(0x505050);

	camera = new THREE.PerspectiveCamera(60, width / height, 0.1, 1000);
	camera.position.z = 1;

	renderer = new THREE.WebGLRenderer({
	antialias: true,
	});
	renderer.setPixelRatio(window.devicePixelRatio);
	renderer.setSize(width, height);
	renderer.xr.enabled = true;

	renderer.xr.setReferenceSpaceType('local');

	parentElement?.appendChild(renderer.domElement);

	controls = new OrbitControls(camera, renderer.domElement);
	controls.update();

	scene.add(camera);

	textBlocks.push(
	...initialTextBlockProps(NUM_LINES).map((props) => makeTextBlock(props)),
	);

	renderer.setAnimationLoop(loop);
	return renderer;
	}

	export function updatetranslationText(
	translationSentences: TranslationSentences,
	): void {
	const newText = INITIAL_PROMPT + translationSentences.join('\n');
	if (transcriptState.translationText === newText) {
	return;
	}
	transcriptState.translationText = newText;
	}

	export function resetState(): void {
	transcriptState = initialState();
	}

	function makeTextBlock({
	content,
	backgroundOpacity,
	}: TextBlockProps): TextBlock {
	const width = MAX_WIDTH;
	const height = LINE_HEIGHT;

	const fontProps = {
	fontSize: FONT_SIZE,
	textAlign: 'left',
	// TODO: support more language charsets
	// This renders using MSDF format supported in WebGL. Renderable characters are defined in the "charset" json
	// Currently supports most default keyboard inputs but this would exclude many non latin charset based languages.
	// You can use https://msdf-bmfont.donmccurdy.com/ for easily generating these files
	fontFamily: FontJSON,
	fontTexture: FontImage,
	};

	const textBlockOuterContainer = new Block({
	backgroundOpacity,
	width: width + OFFSET_WIDTH,
	height: height,
	borderRadius: 0,
	...fontProps,
	});

	const text = new Text({content});
	const textBlockInnerContainer = new Block({
	padding: 0,
	backgroundOpacity: 0,
	width,
	height,
	});

	// Adding it to the camera makes the UI follow it.
	camera.add(textBlockOuterContainer);
	textBlockOuterContainer.add(textBlockInnerContainer);
	textBlockInnerContainer.add(text);

	return {
	textBlockOuterContainer,
	textBlockInnerContainer,
	text,
	};
	}

	// Updates the position and text of a text block from its props
	function updateTextBlock(
	id: number,
	{content, targetY, currentY, backgroundOpacity, isBottomLine}: TextBlockProps,
	): void {
	const {textBlockOuterContainer, textBlockInnerContainer, text} =
	textBlocks[id];

	const {lastTranslationStringIndex, translationText} = transcriptState;

	// Add blinking cursor if we don't have any new input to render
	const numChars = content.length;

	if (
	isBottomLine &&
	cursorBlinkOn &&
	lastTranslationStringIndex >= translationText.length
	) {
	content = content + '\|';
	}

	// Accounting for potential cursor for block width (the +1)
	const width =
	(numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) *
	CHAR_WIDTH;
	const height = LINE_HEIGHT;

	// Width starts from 0 and goes 1/2 in each direction so offset x
	const xPosition = width / 2 - MAX_WIDTH / 2 + OFFSET_WIDTH;
	textBlockOuterContainer?.set({
	backgroundOpacity,
	width: width + 2 * OFFSET_WIDTH,
	height: height + OFFSET / 3,
	borderRadius: 0,
	});

	// Scroll up line toward target
	const y = isBottomLine
	? targetY
	: Math.min(currentY + SCROLL_Y_DELTA, targetY);
	transcriptState.textBlocksProps[id].currentY = y;

	textBlockOuterContainer.position.set(-OFFSET_WIDTH + xPosition, y, Z_COORD);
	textBlockInnerContainer.set({
	padding: 0,
	backgroundOpacity: 0,
	width,
	height,
	});
	text.set({content});
	}

	// We split the text so it fits line by line into the UI
	function chunkTranslationTextIntoLines(
	translationText: string,
	nextTranslationStringIndex: number,
	): string[] {
	// Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now
	const newSentences = translationText
	.substring(0, nextTranslationStringIndex)
	.split('\n');
	const transcriptLines = [''];
	newSentences.forEach((newSentence, sentenceIdx) => {
	const words = newSentence.split(/\s+/);
	words.forEach((word) => {
	const filteredWord = [...word]
	.filter((c) => {
	if (supportedCharSet().has(c)) {
	return true;
	}
	console.error(
	`Unsupported char ${c} - make sure this is supported in the font family msdf file`,
	);
	return false;
	})
	.join('')
	// Filter out unknown symbol
	.replace('<unk>', '');

	const lastLineSoFar = transcriptLines[0];
	const charCount = lastLineSoFar.length + filteredWord.length + 1;

	if (charCount <= CHARS_PER_LINE) {
	transcriptLines[0] = lastLineSoFar + ' ' + filteredWord;
	} else {
	transcriptLines.unshift(filteredWord);
	}
	});

	if (sentenceIdx < newSentences.length - 1) {
	transcriptLines.unshift('\n');
	transcriptLines.unshift('');
	}
	});
	return transcriptLines;
	}

	// The main loop,
	function updateTextBlocksProps(): void {
	const {translationText, lastTranslationStringIndex, lastUpdateTime} =
	transcriptState;

	const currentTime = new Date().getTime();
	const charsToRender = Math.round(
	((currentTime - lastUpdateTime) * CHARS_PER_SECOND) / 1000,
	);

	if (charsToRender < 1) {
	// Wait some more until we render more characters
	return;
	}

	const nextTranslationStringIndex = Math.min(
	lastTranslationStringIndex + charsToRender,
	translationText.length,
	);
	if (nextTranslationStringIndex === lastTranslationStringIndex) {
	// No new characters to render
	transcriptState.lastUpdateTime = currentTime;
	return;
	}

	// Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now
	const transcriptLines = chunkTranslationTextIntoLines(
	translationText,
	nextTranslationStringIndex,
	);
	transcriptState.transcriptLines = transcriptLines;
	transcriptState.lastTranslationStringIndex = nextTranslationStringIndex;

	// Compute the new props for each text block
	const newTextBlocksProps: TextBlockProps[] = [];
	// We start with the most recent line and increment the y coordinate for older lines.
	// If it is a new sentence we increment the y coordinate a little more to leave a visible space
	let y = Y_COORD_START;
	transcriptLines.forEach((line, i) => {
	if (newTextBlocksProps.length == NUM_LINES) {
	return;
	}

	if (line === '\n') {
	y += BLOCK_SPACING;
	return;
	}

	const isBottomLine = newTextBlocksProps.length === 0;

	const textOpacity = 1 - 0.1 * newTextBlocksProps.length;

	const previousProps = transcriptState.textBlocksProps.find(
	(props) => props.index === i,
	);
	const props = {
	targetY: y + LINE_HEIGHT / 2,
	currentY: isBottomLine ? y : previousProps?.currentY \|\| y,
	index: i,
	textOpacity,
	backgroundOpacity: 1,
	content: line,
	isBottomLine,
	};
	newTextBlocksProps.push(props);

	y += LINE_HEIGHT;
	});

	transcriptState.textBlocksProps = newTextBlocksProps;
	transcriptState.lastUpdateTime = currentTime;
	}

	// The main render loop, everything gets rendered here.
	function loop() {
	updateTextBlocksProps();

	transcriptState.textBlocksProps.map((props, i) => updateTextBlock(i, props));

	ThreeMeshUI.update();

	controls.update();
	renderer.render(scene, camera);
	}