Spaces:

VDNT11
/

Multilingual_Assistive_MOdel_w_Flask_Docker

Runtime error

App Files Files Community

VDNT11 commited on Nov 17, 2024

Commit

72a8f7d

verified ·

1 Parent(s): d7a1308

Upload 5 files

Browse files

Files changed (4) hide show

Dockerfile +30 -0
app.py +93 -0
index.html +180 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.9-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Clone and install IndicTransToolkit
+RUN git clone https://github.com/VarunGumma/IndicTransToolkit \
+    && cd IndicTransToolkit \
+    && pip install --editable ./
+# Create necessary directories
+RUN mkdir -p templates
+# Expose the port the app runs on
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from flask import Flask, request, render_template, send_from_directory
+from PIL import Image
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
+from gtts import gTTS
+import os
+import soundfile as sf
+from transformers import VitsTokenizer, VitsModel, set_seed
+from IndicTransToolkit import IndicProcessor
+# Initialize Flask app
+app = Flask(__name__)
+UPLOAD_FOLDER = "./static/uploads/"
+AUDIO_FOLDER = "./static/audio/"
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(AUDIO_FOLDER, exist_ok=True)
+app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
+app.config["AUDIO_FOLDER"] = AUDIO_FOLDER
+# Load models
+blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda" if torch.cuda.is_available() else "cpu")
+model_name = "ai4bharat/indictrans2-en-indic-1B"
+tokenizer_IT2 = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model_IT2 = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
+model_IT2 = torch.quantization.quantize_dynamic(
+    model_IT2, {torch.nn.Linear}, dtype=torch.qint8
+)
+model_IT2.to("cuda" if torch.cuda.is_available() else "cpu")
+ip = IndicProcessor(inference=True)
+# Functions
+def generate_caption(image_path):
+    image = Image.open(image_path).convert("RGB")
+    inputs = blip_processor(image, "image of", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+    with torch.no_grad():
+        generated_ids = blip_model.generate(**inputs)
+    return blip_processor.decode(generated_ids[0], skip_special_tokens=True)
+def translate_caption(caption, target_languages):
+    src_lang = "eng_Latn"
+    input_sentences = [caption]
+    translations = {}
+    for tgt_lang in target_languages:
+        batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
+        inputs = tokenizer_IT2(batch, truncation=True, padding="longest", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+        with torch.no_grad():
+            generated_tokens = model_IT2.generate(
+                **inputs, min_length=0, max_length=256, num_beams=5, num_return_sequences=1
+            )
+        with tokenizer_IT2.as_target_tokenizer():
+            translated_tokens = tokenizer_IT2.batch_decode(generated_tokens.detach().cpu().tolist(), skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        translations[tgt_lang] = ip.postprocess_batch(translated_tokens, lang=tgt_lang)[0]
+    return translations
+def generate_audio_gtts(text, lang_code, output_file):
+    tts = gTTS(text=text, lang=lang_code)
+    tts.save(output_file)
+    return output_file
+@app.route("/", methods=["GET", "POST"])
+def index():
+    if request.method == "POST":
+        image_file = request.files.get("image")
+        if image_file:
+            image_path = os.path.join(app.config["UPLOAD_FOLDER"], image_file.filename)
+            image_file.save(image_path)
+            caption = generate_caption(image_path)
+            target_languages = request.form.getlist("languages")
+            translations = translate_caption(caption, target_languages)
+            audio_files = {}
+            lang_codes = {
+                "hin_Deva": "hi", "guj_Gujr": "gu", "urd_Arab": "ur", "mar_Deva": "mr"
+            }
+            for lang, translation in translations.items():
+                lang_code = lang_codes.get(lang, "en")
+                audio_file_path = os.path.join(app.config["AUDIO_FOLDER"], f"{lang}.mp3")
+                audio_files[lang] = generate_audio_gtts(translation, lang_code, audio_file_path)
+            return render_template(
+                "index.html", image_path=image_path, caption=caption, translations=translations, audio_files=audio_files
+            )
+    return render_template("index.html")
+@app.route("/audio/<filename>")
+def audio(filename):
+    return send_from_directory(app.config["AUDIO_FOLDER"], filename)
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)

index.html ADDED Viewed

	@@ -0,0 +1,180 @@

+<!-- <!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Multilingual Assistive Model</title>
+</head>
+<body>
+    <h1>Multilingual Assistive Model</h1>
+    <form action="/" method="POST" enctype="multipart/form-data">
+        <label for="image">Upload an Image:</label>
+        <input type="file" id="image" name="image" required>
+        <br><br>
+        <label>Select Target Languages:</label><br>
+        <input type="checkbox" name="languages" value="hin_Deva"> Hindi<br>
+        <input type="checkbox" name="languages" value="mar_Deva"> Marathi<br>
+        <input type="checkbox" name="languages" value="guj_Gujr"> Gujarati<br>
+        <input type="checkbox" name="languages" value="urd_Arab"> Urdu<br>
+        <button type="submit">Submit</button>
+    </form>
+    {% if caption %}
+        <h2>Uploaded Image</h2>
+        <img src="{{ image_path }}" alt="Uploaded Image" style="max-width: 300px;">
+        <h2>Caption: {{ caption }}</h2>
+        <h2>Translations:</h2>
+        <ul>
+            {% for lang, translation in translations.items() %}
+            <li>{{ lang }}: {{ translation }}
+                <audio controls>
+                    <source src="/audio/{{ lang }}.mp3" type="audio/mpeg">
+                </audio>
+            </li>
+            {% endfor %}
+        </ul>
+    {% endif %}
+</body>
+</html> -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Multilingual Assistive Model</title>
+    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+</head>
+<body class="bg-gray-100 p-8">
+    <div class="max-w-4xl mx-auto">
+        <h1 class="text-3xl font-bold mb-8">Multilingual Assistive Model</h1>
+        <div class="bg-white p-6 rounded-lg shadow-lg">
+            <form id="uploadForm" class="space-y-4">
+                <div>
+                    <label class="block text-sm font-medium mb-2">Upload Image</label>
+                    <input type="file" name="image" accept="image/*" required
+                           class="w-full p-2 border rounded">
+                </div>
+                <div>
+                    <label class="block text-sm font-medium mb-2">Select Languages</label>
+                    <div class="space-y-2">
+                        <label class="inline-flex items-center">
+                            <input type="checkbox" name="languages[]" value="hin_Deva" checked
+                                   class="form-checkbox">
+                            <span class="ml-2">Hindi</span>
+                        </label>
+                        <br>
+                        <label class="inline-flex items-center">
+                            <input type="checkbox" name="languages[]" value="mar_Deva" checked
+                                   class="form-checkbox">
+                            <span class="ml-2">Marathi</span>
+                        </label>
+                        <br>
+                        <label class="inline-flex items-center">
+                            <input type="checkbox" name="languages[]" value="guj_Gujr"
+                                   class="form-checkbox">
+                            <span class="ml-2">Gujarati</span>
+                        </label>
+                        <br>
+                        <label class="inline-flex items-center">
+                            <input type="checkbox" name="languages[]" value="urd_Arab"
+                                   class="form-checkbox">
+                            <span class="ml-2">Urdu</span>
+                        </label>
+                    </div>
+                </div>
+                <button type="submit"
+                        class="w-full bg-blue-500 text-white py-2 px-4 rounded hover:bg-blue-600">
+                    Process Image
+                </button>
+            </form>
+            <div id="results" class="mt-8 hidden">
+                <div id="imagePreview" class="mb-4"></div>
+                <div class="space-y-4">
+                    <div>
+                        <h2 class="text-xl font-semibold mb-2">Caption:</h2>
+                        <p id="caption" class="text-gray-700"></p>
+                    </div>
+                    <div>
+                        <h2 class="text-xl font-semibold mb-2">Translations:</h2>
+                        <div id="translations" class="space-y-2"></div>
+                    </div>
+                </div>
+            </div>
+            <div id="loading" class="hidden mt-4">
+                <p class="text-center text-gray-600">Processing... Please wait.</p>
+            </div>
+        </div>
+    </div>
+    <script>
+        document.getElementById('uploadForm').addEventListener('submit', async (e) => {
+            e.preventDefault();
+            const form = e.target;
+            const formData = new FormData(form);
+            // Show loading
+            document.getElementById('loading').classList.remove('hidden');
+            document.getElementById('results').classList.add('hidden');
+            try {
+                const response = await fetch('/process', {
+                    method: 'POST',
+                    body: formData
+                });
+                const data = await response.json();
+                // Display results
+                document.getElementById('caption').textContent = data.caption;
+                const translationsDiv = document.getElementById('translations');
+                translationsDiv.innerHTML = '';
+                for (const [lang, translation] of Object.entries(data.translations)) {
+                    const div = document.createElement('div');
+                    div.className = 'mb-4';
+                    div.innerHTML = `
+                        <h3 class="font-medium">${lang}:</h3>
+                        <p class="text-gray-700">${translation}</p>
+                        <audio controls src="/audio/${lang}" class="mt-2"></audio>
+                    `;
+                    translationsDiv.appendChild(div);
+                }
+                // Show results
+                document.getElementById('results').classList.remove('hidden');
+            } catch (error) {
+                console.error('Error:', error);
+                alert('An error occurred while processing the image.');
+            } finally {
+                document.getElementById('loading').classList.add('hidden');
+            }
+        });
+        // Image preview
+        document.querySelector('input[type="file"]').addEventListener('change', (e) => {
+            const file = e.target.files[0];
+            if (file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    const img = document.createElement('img');
+                    img.src = e.target.result;
+                    img.className = 'max-w-full h-auto rounded';
+                    const previewDiv = document.getElementById('imagePreview');
+                    previewDiv.innerHTML = '';
+                    previewDiv.appendChild(img);
+                };
+                reader.readAsDataURL(file);
+            }
+        });
+    </script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask==2.0.1
+Pillow==9.3.0
+torch==2.0.0
+transformers==4.28.0
+gTTS==2.3.1
+soundfile==0.12.1
+numpy==1.24.2
+scipy==1.10.1