"use client"; import { Typography } from "@material-tailwind/react"; import AboutCard from "@/components/about-card"; import React from 'react'; // { // title: "Title", // description: "", // subTitle: "", // imageName : "paper12.png", // paper_links :"" // }, const EVENT_INFO = [ { title: "Tuning-Free Noise Rectification:for High Fidelity Image-to-Video Generation", description: "Noise Rectification is a simple but effective method for image-to-video generation in open domains, and is tuning-free and plug-and-play. Below are several comparisons between method and other methods.", subTitle: "Noise Control/Image-to-Video Generation", imageName : "paper16.mp4", paper_links :"https://arxiv.org/pdf/2403.02827.pdf" }, { title: "ConsistI2V: Enhancing Visual Consistency for Image-to-Video Generation", description: "Image-to-video (I2V) generation aims to use the \ initial frame (alongside a text prompt) to create a \ video sequence. A grand challenge in I2V generation is to maintain visual consistency throughout \ the video: existing methods often struggle to preserve the integrity of the subject, background, and \ style from the first frame, as well as ensure a fluid \ and logical progression within the video narrative ", subTitle: "Image2Video Generation/Consistency", imageName : "paper15.png", paper_links :"https://arxiv.org/pdf/2402.04324.pdf" }, { title: "GEA: Reconstructing Expressive 3D Gaussian Avatar from Monocular Video", description: "A novel method utilizing 3D Gaussians for creating expressive 3D avatars achieves state-of-the-art performance in photorealistic novel view synthesis. It features accurate pose estimation, attention-aware networks, and an iterative re-initialization strategy for high-fidelity reconstructions and fine-grained control over body and hand poses. Project-Page Project page: \ https://3d-aigc.github.io/GEA/", subTitle: "Gaussian/Nerf/3D Reconstruction", imageName : "paper14.mp4", paper_links :"https://arxiv.org/pdf/2402.16607.pdf" }, { title: "PeRFlow: Piecewise Rectified Flow as Universal Plug-and-Play Accelerator", description: "PeRFlow trains piecewise-linear rectified flow models for fast sampling. These models can be initialized from pretrained diffusion models, such as Stable Diffusion (SD). The obtained weights of PeRFlow serve as a general accelerator module which is compatible with various fine-tuned stylized SD models as well as SD-based generation/editing pipelines. Specifically, \ are computed by the PeRFlow's weights minus the pretrained SD. One can fuse the PeRFlow.\ into various SD pipelines for (conditional) image generation/editing to enable high-quality few-step inference.", subTitle: "Finetune LORAs / Diffusion Models / PeRFlow", imageName : "perflow-v1.mp4", paper_links :"https://piecewise-rectified-flow.github.io/" }, { title: "Deformable One-shot Face Stylization via DINO Semantic Guidance", description: "This paper presents a novel approach to one-shot face stylization, focusing on appearance and structure. They use a self-supervised vision transformer, DINO-ViT, and integrate spatial transformers into StyleGAN for deformation-aware stylization. Innovative constraints and style-mixing enhance deformability and efficiency, demonstrating superiority over existing methods through extensive comparisons. Code is available at https://github.com/zichongc/DoesFS. ", subTitle: "GANS/StyleGAN/Deformable Stylization", imageName : "paper13.png", paper_links :"https://arxiv.org/pdf/2403.00459.pdf" }, { title: "Pix2Gif: Motion-Guided Diffusion for GIF Generation", description: "Pix2Gif introduces a novel approach to image-to-GIF generation using text and motion prompts. Their model utilizes motion-guided warping and perceptual loss to ensure content consistency. Pretrained on curated data, it effectively translates prompts into coherent GIFs, demonstrated through extensive experiments. Page:https://hiteshk03.github.io/Pix2Gif/", subTitle: "Text2Video/Animation/Diffusion", imageName : "paper111.png", paper_links :"https://arxiv.org/pdf/2403.04634.pdf" }, { title: "PixArt-Σ: Weak-to-Strong Training of Diffusion \ Transformer for 4K Text-to-Image Generation", description: "PixArt-Σ is a cutting-edge Diffusion Transformer model that generates 4K images with superior fidelity and alignment to text prompts. It achieves this through high-quality training data and efficient token compression, resulting in smaller model size and superior image quality compared to existing models. Project-Page: https://pixart-alpha.github.io/PixArt-sigma-project/", subTitle: "Speech/Talking Face Generation", imageName : "paper12.png", paper_links :"https://arxiv.org/pdf/2403.04692.pdf" }, { title: "EmoSpeaker: One-shot Fine-grained \ Emotion-Controlled Talking Face Generation", description: "The proposal introduces EmoSpeaker, a method enhancing emotional expression in generated facial animations. It employs a visual attribute-guided audio decoupler, fine-grained emotion coefficient prediction, and intensity control to improve emotional quality and lip synchronization. Experimental results show superiority over existing methods. Project-Page: https://peterfanfan.github.io/EmoSpeaker/", subTitle: "Speech/Talking Face Generation", imageName : "paper10.png", paper_links :"https://arxiv.org/pdf/2402.01422.pdf" }, { title: "AVI-Talking: Learning Audio-Visual Instructions for \ Expressive 3D Talking Face Generation", description: "AVI-Talking, a system for creating lifelike talking faces that match speech with expressive facial movements. Using advanced language models, it generates instructions for facial details based on speech, resulting in realistic and emotionally consistent animations.", subTitle: "Speech/LLMs/Talking Head", imageName : "paper9.png", paper_links :"https://arxiv.org/pdf/2402.16124.pdf" }, { title: "REAL3D-PORTRAIT: ONE-SHOT REALISTIC 3D \ TALKING PORTRAIT SYNTHESIS", description: "Real3D-Portrait addresses limitations in one-shot 3D talking portrait generation by enhancing reconstruction accuracy, stable animation, and realism. It employs a large image-to-plane model, efficient motion adapter, and head-torso-background super-resolution model for realistic videos, alongside a generalizable audio-to-motion model for audio-driven animation.", subTitle: "Talking Head/Face Generation/Lipsync/Nerf", imageName : "paper8.png", paper_links :"https://arxiv.org/pdf/2401.08503.pdf" }, { title: "Resolution-Agnostic Neural Compression for \ High-Fidelity Portrait Video Conferencing via \ Implicit Radiance Fields", description: "A novel low bandwidth neural compression approach for high-fidelity portrait video conferencing is proposed. Dynamic neural radiance fields reconstruct talking heads with expression features, enabling ultra-low bandwidth transmission and high fidelity portrait rendering via volume rendering.", subTitle: "Talking Head/Face Generation/Lipsync/Nerf", imageName : "paper7.png", paper_links :"https://arxiv.org/pdf/2402.16599.pdf" }, { title: " Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis", description: "The paper introduces DynTet, a novel hybrid representation combining neural networks and dynamic meshes for accurate facial avatar generation. It addresses artifacts and jitters in implicit methods like NeRF, achieving fidelity, lip synchronization, and real-time performance. Code is available. https://github.com/zhangzc21/DynTet", subTitle: "Talking Head/Face Generation/Lipsync", imageName : "paper6.png", paper_links :"https://arxiv.org/pdf/2402.17364.pdf" }, { title: "EMO: Emote Portrait Alive - Generating \ Expressive Portrait Videos with Audio2Video \ Diffusion Model under Weak Conditions", description: "EMO, a pioneering framework for generating lifelike talking head videos by directly synthesizing video from audio inputs. Unlike traditional methods, EMO bypasses 3D models, ensuring seamless transitions and maintaining identity. Experimental results show superior expressiveness and realism, even in singing videos.", subTitle: "Talking Head/Face Generation/Lipsync", imageName : "paper5.png", paper_links :"https://arxiv.org/pdf/2402.17485.pdf" }, { title: "Lips Are Lying: Spotting the Temporal Inconsistency between Audio and Visual in Lip-Syncing DeepFakes", description: " DeepFake can be bifurcated into entertainment applications like face swapping and illicit uses such as lipsyncing fraud", subTitle: "Lipsync", imageName : "paper1.png", paper_links :"https://arxiv.org/pdf/2401.15668.pdf" }, { title: "FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio", description: "This paper proposes a method for generating diverse and synchronized talking faces from a single audio input. It tackles challenges by decoupling identity, content, and emotion from audio and maintaining diversity and consistency. The method involves Progressive Audio Disentanglement and Controllable Coherent Frame generation.", subTitle: "Lipsync", imageName : "paper2.png", paper_links :"https://arxiv.org/pdf/2403.01901.pdf" }, { title: "G4G: A Generic Framework for High Fidelity Talking Face Generation with Fine-grained Intra-modal Alignment", description: "This paper addresses the challenge of generating high-fidelity talking faces with synchronized lip movements for arbitrary audio. They propose G4G, a framework enhancing audio-image alignment using diagonal matrices and multi-scale supervision, achieving competitive results.", subTitle: "Lipsync", imageName : "paper3.png", paper_links :"https://arxiv.org/pdf/2402.18122.pdf" }, { title: "Context-aware Talking Face Video Generation", description: "This paper introduces a method for generating multi-person talking face videos considering contextual interactions. It utilizes facial landmarks to control video generation stages, achieving synchronized and coherent results surpassing baselines.", subTitle: "Talking Head/Face Generation", imageName : "paper4.png", paper_links :"https://arxiv.org/pdf/2402.18092.pdf" }, ]; export function AboutEvent() { return (
Every Day Update Gen AI Top Papers and Research Contribute to the AI community by sharing your insights and expertise
{EVENT_INFO.map((props, idx) => ( ))}
); } export default AboutEvent;