Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- Dockerfile +30 -0
- app.py +93 -0
- index.html +180 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
git \
|
8 |
+
build-essential \
|
9 |
+
&& rm -rf /var/lib/apt/lists/*
|
10 |
+
|
11 |
+
# Copy requirements first to leverage Docker cache
|
12 |
+
COPY requirements.txt .
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Copy application code
|
16 |
+
COPY . .
|
17 |
+
|
18 |
+
# Clone and install IndicTransToolkit
|
19 |
+
RUN git clone https://github.com/VarunGumma/IndicTransToolkit \
|
20 |
+
&& cd IndicTransToolkit \
|
21 |
+
&& pip install --editable ./
|
22 |
+
|
23 |
+
# Create necessary directories
|
24 |
+
RUN mkdir -p templates
|
25 |
+
|
26 |
+
# Expose the port the app runs on
|
27 |
+
EXPOSE 7860
|
28 |
+
|
29 |
+
# Command to run the application
|
30 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template, send_from_directory
|
2 |
+
from PIL import Image
|
3 |
+
import torch
|
4 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
|
5 |
+
from gtts import gTTS
|
6 |
+
import os
|
7 |
+
import soundfile as sf
|
8 |
+
from transformers import VitsTokenizer, VitsModel, set_seed
|
9 |
+
from IndicTransToolkit import IndicProcessor
|
10 |
+
|
11 |
+
# Initialize Flask app
|
12 |
+
app = Flask(__name__)
|
13 |
+
UPLOAD_FOLDER = "./static/uploads/"
|
14 |
+
AUDIO_FOLDER = "./static/audio/"
|
15 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
16 |
+
os.makedirs(AUDIO_FOLDER, exist_ok=True)
|
17 |
+
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
18 |
+
app.config["AUDIO_FOLDER"] = AUDIO_FOLDER
|
19 |
+
|
20 |
+
# Load models
|
21 |
+
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
22 |
+
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
+
model_name = "ai4bharat/indictrans2-en-indic-1B"
|
24 |
+
tokenizer_IT2 = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
25 |
+
model_IT2 = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
|
26 |
+
model_IT2 = torch.quantization.quantize_dynamic(
|
27 |
+
model_IT2, {torch.nn.Linear}, dtype=torch.qint8
|
28 |
+
)
|
29 |
+
model_IT2.to("cuda" if torch.cuda.is_available() else "cpu")
|
30 |
+
ip = IndicProcessor(inference=True)
|
31 |
+
|
32 |
+
# Functions
|
33 |
+
def generate_caption(image_path):
|
34 |
+
image = Image.open(image_path).convert("RGB")
|
35 |
+
inputs = blip_processor(image, "image of", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
|
36 |
+
with torch.no_grad():
|
37 |
+
generated_ids = blip_model.generate(**inputs)
|
38 |
+
return blip_processor.decode(generated_ids[0], skip_special_tokens=True)
|
39 |
+
|
40 |
+
def translate_caption(caption, target_languages):
|
41 |
+
src_lang = "eng_Latn"
|
42 |
+
input_sentences = [caption]
|
43 |
+
translations = {}
|
44 |
+
|
45 |
+
for tgt_lang in target_languages:
|
46 |
+
batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
|
47 |
+
inputs = tokenizer_IT2(batch, truncation=True, padding="longest", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
|
48 |
+
with torch.no_grad():
|
49 |
+
generated_tokens = model_IT2.generate(
|
50 |
+
**inputs, min_length=0, max_length=256, num_beams=5, num_return_sequences=1
|
51 |
+
)
|
52 |
+
with tokenizer_IT2.as_target_tokenizer():
|
53 |
+
translated_tokens = tokenizer_IT2.batch_decode(generated_tokens.detach().cpu().tolist(), skip_special_tokens=True, clean_up_tokenization_spaces=True)
|
54 |
+
translations[tgt_lang] = ip.postprocess_batch(translated_tokens, lang=tgt_lang)[0]
|
55 |
+
return translations
|
56 |
+
|
57 |
+
def generate_audio_gtts(text, lang_code, output_file):
|
58 |
+
tts = gTTS(text=text, lang=lang_code)
|
59 |
+
tts.save(output_file)
|
60 |
+
return output_file
|
61 |
+
|
62 |
+
@app.route("/", methods=["GET", "POST"])
|
63 |
+
def index():
|
64 |
+
if request.method == "POST":
|
65 |
+
image_file = request.files.get("image")
|
66 |
+
if image_file:
|
67 |
+
image_path = os.path.join(app.config["UPLOAD_FOLDER"], image_file.filename)
|
68 |
+
image_file.save(image_path)
|
69 |
+
|
70 |
+
caption = generate_caption(image_path)
|
71 |
+
target_languages = request.form.getlist("languages")
|
72 |
+
translations = translate_caption(caption, target_languages)
|
73 |
+
|
74 |
+
audio_files = {}
|
75 |
+
lang_codes = {
|
76 |
+
"hin_Deva": "hi", "guj_Gujr": "gu", "urd_Arab": "ur", "mar_Deva": "mr"
|
77 |
+
}
|
78 |
+
for lang, translation in translations.items():
|
79 |
+
lang_code = lang_codes.get(lang, "en")
|
80 |
+
audio_file_path = os.path.join(app.config["AUDIO_FOLDER"], f"{lang}.mp3")
|
81 |
+
audio_files[lang] = generate_audio_gtts(translation, lang_code, audio_file_path)
|
82 |
+
|
83 |
+
return render_template(
|
84 |
+
"index.html", image_path=image_path, caption=caption, translations=translations, audio_files=audio_files
|
85 |
+
)
|
86 |
+
return render_template("index.html")
|
87 |
+
|
88 |
+
@app.route("/audio/<filename>")
|
89 |
+
def audio(filename):
|
90 |
+
return send_from_directory(app.config["AUDIO_FOLDER"], filename)
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
app.run(host="0.0.0.0", port=7860)
|
index.html
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- <!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Multilingual Assistive Model</title>
|
7 |
+
</head>
|
8 |
+
<body>
|
9 |
+
<h1>Multilingual Assistive Model</h1>
|
10 |
+
<form action="/" method="POST" enctype="multipart/form-data">
|
11 |
+
<label for="image">Upload an Image:</label>
|
12 |
+
<input type="file" id="image" name="image" required>
|
13 |
+
<br><br>
|
14 |
+
<label>Select Target Languages:</label><br>
|
15 |
+
<input type="checkbox" name="languages" value="hin_Deva"> Hindi<br>
|
16 |
+
<input type="checkbox" name="languages" value="mar_Deva"> Marathi<br>
|
17 |
+
<input type="checkbox" name="languages" value="guj_Gujr"> Gujarati<br>
|
18 |
+
<input type="checkbox" name="languages" value="urd_Arab"> Urdu<br>
|
19 |
+
<button type="submit">Submit</button>
|
20 |
+
</form>
|
21 |
+
|
22 |
+
{% if caption %}
|
23 |
+
<h2>Uploaded Image</h2>
|
24 |
+
<img src="{{ image_path }}" alt="Uploaded Image" style="max-width: 300px;">
|
25 |
+
<h2>Caption: {{ caption }}</h2>
|
26 |
+
<h2>Translations:</h2>
|
27 |
+
<ul>
|
28 |
+
{% for lang, translation in translations.items() %}
|
29 |
+
<li>{{ lang }}: {{ translation }}
|
30 |
+
<audio controls>
|
31 |
+
<source src="/audio/{{ lang }}.mp3" type="audio/mpeg">
|
32 |
+
</audio>
|
33 |
+
</li>
|
34 |
+
{% endfor %}
|
35 |
+
</ul>
|
36 |
+
{% endif %}
|
37 |
+
</body>
|
38 |
+
</html> -->
|
39 |
+
|
40 |
+
|
41 |
+
<!DOCTYPE html>
|
42 |
+
<html>
|
43 |
+
<head>
|
44 |
+
<title>Multilingual Assistive Model</title>
|
45 |
+
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
|
46 |
+
</head>
|
47 |
+
<body class="bg-gray-100 p-8">
|
48 |
+
<div class="max-w-4xl mx-auto">
|
49 |
+
<h1 class="text-3xl font-bold mb-8">Multilingual Assistive Model</h1>
|
50 |
+
|
51 |
+
<div class="bg-white p-6 rounded-lg shadow-lg">
|
52 |
+
<form id="uploadForm" class="space-y-4">
|
53 |
+
<div>
|
54 |
+
<label class="block text-sm font-medium mb-2">Upload Image</label>
|
55 |
+
<input type="file" name="image" accept="image/*" required
|
56 |
+
class="w-full p-2 border rounded">
|
57 |
+
</div>
|
58 |
+
|
59 |
+
<div>
|
60 |
+
<label class="block text-sm font-medium mb-2">Select Languages</label>
|
61 |
+
<div class="space-y-2">
|
62 |
+
<label class="inline-flex items-center">
|
63 |
+
<input type="checkbox" name="languages[]" value="hin_Deva" checked
|
64 |
+
class="form-checkbox">
|
65 |
+
<span class="ml-2">Hindi</span>
|
66 |
+
</label>
|
67 |
+
<br>
|
68 |
+
<label class="inline-flex items-center">
|
69 |
+
<input type="checkbox" name="languages[]" value="mar_Deva" checked
|
70 |
+
class="form-checkbox">
|
71 |
+
<span class="ml-2">Marathi</span>
|
72 |
+
</label>
|
73 |
+
<br>
|
74 |
+
<label class="inline-flex items-center">
|
75 |
+
<input type="checkbox" name="languages[]" value="guj_Gujr"
|
76 |
+
class="form-checkbox">
|
77 |
+
<span class="ml-2">Gujarati</span>
|
78 |
+
</label>
|
79 |
+
<br>
|
80 |
+
<label class="inline-flex items-center">
|
81 |
+
<input type="checkbox" name="languages[]" value="urd_Arab"
|
82 |
+
class="form-checkbox">
|
83 |
+
<span class="ml-2">Urdu</span>
|
84 |
+
</label>
|
85 |
+
</div>
|
86 |
+
</div>
|
87 |
+
|
88 |
+
<button type="submit"
|
89 |
+
class="w-full bg-blue-500 text-white py-2 px-4 rounded hover:bg-blue-600">
|
90 |
+
Process Image
|
91 |
+
</button>
|
92 |
+
</form>
|
93 |
+
|
94 |
+
<div id="results" class="mt-8 hidden">
|
95 |
+
<div id="imagePreview" class="mb-4"></div>
|
96 |
+
|
97 |
+
<div class="space-y-4">
|
98 |
+
<div>
|
99 |
+
<h2 class="text-xl font-semibold mb-2">Caption:</h2>
|
100 |
+
<p id="caption" class="text-gray-700"></p>
|
101 |
+
</div>
|
102 |
+
|
103 |
+
<div>
|
104 |
+
<h2 class="text-xl font-semibold mb-2">Translations:</h2>
|
105 |
+
<div id="translations" class="space-y-2"></div>
|
106 |
+
</div>
|
107 |
+
</div>
|
108 |
+
</div>
|
109 |
+
|
110 |
+
<div id="loading" class="hidden mt-4">
|
111 |
+
<p class="text-center text-gray-600">Processing... Please wait.</p>
|
112 |
+
</div>
|
113 |
+
</div>
|
114 |
+
</div>
|
115 |
+
|
116 |
+
<script>
|
117 |
+
document.getElementById('uploadForm').addEventListener('submit', async (e) => {
|
118 |
+
e.preventDefault();
|
119 |
+
|
120 |
+
const form = e.target;
|
121 |
+
const formData = new FormData(form);
|
122 |
+
|
123 |
+
// Show loading
|
124 |
+
document.getElementById('loading').classList.remove('hidden');
|
125 |
+
document.getElementById('results').classList.add('hidden');
|
126 |
+
|
127 |
+
try {
|
128 |
+
const response = await fetch('/process', {
|
129 |
+
method: 'POST',
|
130 |
+
body: formData
|
131 |
+
});
|
132 |
+
|
133 |
+
const data = await response.json();
|
134 |
+
|
135 |
+
// Display results
|
136 |
+
document.getElementById('caption').textContent = data.caption;
|
137 |
+
|
138 |
+
const translationsDiv = document.getElementById('translations');
|
139 |
+
translationsDiv.innerHTML = '';
|
140 |
+
|
141 |
+
for (const [lang, translation] of Object.entries(data.translations)) {
|
142 |
+
const div = document.createElement('div');
|
143 |
+
div.className = 'mb-4';
|
144 |
+
div.innerHTML = `
|
145 |
+
<h3 class="font-medium">${lang}:</h3>
|
146 |
+
<p class="text-gray-700">${translation}</p>
|
147 |
+
<audio controls src="/audio/${lang}" class="mt-2"></audio>
|
148 |
+
`;
|
149 |
+
translationsDiv.appendChild(div);
|
150 |
+
}
|
151 |
+
|
152 |
+
// Show results
|
153 |
+
document.getElementById('results').classList.remove('hidden');
|
154 |
+
} catch (error) {
|
155 |
+
console.error('Error:', error);
|
156 |
+
alert('An error occurred while processing the image.');
|
157 |
+
} finally {
|
158 |
+
document.getElementById('loading').classList.add('hidden');
|
159 |
+
}
|
160 |
+
});
|
161 |
+
|
162 |
+
// Image preview
|
163 |
+
document.querySelector('input[type="file"]').addEventListener('change', (e) => {
|
164 |
+
const file = e.target.files[0];
|
165 |
+
if (file) {
|
166 |
+
const reader = new FileReader();
|
167 |
+
reader.onload = (e) => {
|
168 |
+
const img = document.createElement('img');
|
169 |
+
img.src = e.target.result;
|
170 |
+
img.className = 'max-w-full h-auto rounded';
|
171 |
+
const previewDiv = document.getElementById('imagePreview');
|
172 |
+
previewDiv.innerHTML = '';
|
173 |
+
previewDiv.appendChild(img);
|
174 |
+
};
|
175 |
+
reader.readAsDataURL(file);
|
176 |
+
}
|
177 |
+
});
|
178 |
+
</script>
|
179 |
+
</body>
|
180 |
+
</html>
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask==2.0.1
|
2 |
+
Pillow==9.3.0
|
3 |
+
torch==2.0.0
|
4 |
+
transformers==4.28.0
|
5 |
+
gTTS==2.3.1
|
6 |
+
soundfile==0.12.1
|
7 |
+
numpy==1.24.2
|
8 |
+
scipy==1.10.1
|