Spaces:

byh711
/

FLODA-Deepfake-Detection

Sleeping

App Files Files Community

byh711 commited on Sep 12, 2024

Commit

85aeaf9

verified ·

1 Parent(s): 6929548

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -127

app.py CHANGED Viewed

@@ -1,142 +1,149 @@
 import gradio as gr
-import numpy as np
-import random
-#import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-#@spaces.GPU #[uncomment to use ZeroGPU]
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
 }
 """
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        """)
         with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0, #Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2, #Replace with defaults that work for your model
-                )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
         )
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result, seed]
-    )
-demo.queue().launch()

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
 import torch
+from peft import PeftModel
+import numpy as np
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch_dtype = torch.float32
+# Load the fine-tuned base model
+base_model = AutoModelForCausalLM.from_pretrained('byh711/FLODA-deepfake', trust_remote_code=True, torch_dtype=torch_dtype).to(device)
+processor = AutoProcessor.from_pretrained('byh711/FLODA-deepfake', trust_remote_code=True)
+# Load the LoRA weights
+model = PeftModel.from_pretrained(base_model, peft_model_path)
+model.eval()
+def caption_generate(task_prompt, text_input=None, image=None):
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
+    generated_ids = model.generate(
+      input_ids=inputs["input_ids"],
+      pixel_values=inputs["pixel_values"],
+      max_new_tokens=1024,
+      num_beams=3
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+    return parsed_answer[task_prompt][1:-1]
+def run_example(task_prompt, text_input=None, image=None):
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    image = image.convert("RGB")
+    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
+    inputs = {k: v.to(torch_dtype) if v.is_floating_point() else v for k, v in inputs.items()}
+    generated_ids = base_model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        num_beams=3
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    result = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))['<DEEPFAKE_DETECTION>']
+    if result.lower() == "yes":
+        return "This is a real image."
+    elif result.lower() == "no":
+        return "This is a fake image."
+    else:
+        return f"Uncertain. Model output: {result}"
+# Define the Gradio interface
+css = """
+body {
+    background-color: #1e1e2e;
+    color: #d4d4dc;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+}
+#output {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #444;
+    background-color: #282c34;
+    color: #f1f1f1;
+    padding: 10px;
+}
+.gr-button {
+    background-color: #3a3f51;
+    border: none;
+    color: #ffffff;
+    padding: 10px 20px;
+    text-align: center;
+    font-size: 14px;
+    cursor: pointer;
+    transition: 0.3s;
+}
+.gr-button:hover {
+    background-color: #4b5263;
+}
+.gr-textbox {
+    background-color: #2e2e38;
+    border: 1px solid #555;
+    color: #ffffff;
+}
+.gr-markdown {
+    color: #d4d4dc;
 }
 """
+js_func = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
+TITLE = "# FLODA: Vision-Language Models for Deepfake Detection"
+DESCRIPTION = """
+FLODA (FLorence-2 Optimized for Deepfake Assessment) is an advanced deepfake detection model leveraging the power of [Florence-2](https://huggingface.co/microsoft/Florence-2-base-ft).
+FLODA combines image captioning with authenticity assessment in a single end-to-end architecture, demonstrating superior performance compared to existing benchmarks.
+Learn more about FLODA in the published paper [here](https://github.com/byh711/FLODA).
+"""
+with gr.Blocks(js=js_func, css=css) as demo:
+    gr.Markdown(TITLE)
+    gr.Markdown(DESCRIPTION)
+    with gr.Tab(label="FLODA: Deepfake Detection"):
         with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(label="Input Picture", type="numpy")
+                submit_btn = gr.Button(value="Submit")
+            with gr.Column():
+                output_text = gr.Textbox(label="Output Text")
+        submit_btn.click(
+            fn=lambda image: run_example("<DEEPFAKE_DETECTION>", text_input=None, image=image),
+            inputs=[input_img],
+            outputs=[output_text]
         )
+demo.launch(debug=True)