Spaces:

st0bb3n
/

Cam2Speech

Runtime error

Cam2Speech / app.py

Update app.py

f944d10 over 2 years ago

1.34 kB

	from transformers import ViTFeatureExtractor, ViTForImageClassification
	import gradio as gr
	from datasets import load_dataset
	import torch

	dataset = load_dataset("cifar100")
	image = dataset["train"]["fine_label"]

	def classify(image):
	feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
	model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
	inputs = feature_extractor(images=image, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	# model predicts one of the 1000 ImageNet classes
	predicted_class_idx = logits.argmax(-1).item()
	return model.config.id2label[predicted_class_idx]

	def image2speech(image):
	txt = classify(image)
	return fastspeech(txt), txt

	fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")

	app = gr.Interface(fn=image2speech,
	inputs="image",
	title="Image to speech",
	description="Classifies and image and tell you what is it, intended to help the visually impaired",
	examples=["remotecontrol.jpg", "calculator.jpg", "cellphone.jpg"],
	allow_flagging="never",
	outputs=["audio", "text"])

	app.launch(cache_examples=True)