@misc{guan2025audcastaudiodrivenhumanvideo, title={AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers}, author={Jiazhi Guan and Kaisiyuan Wang and Zhiliang Xu and Quanwei Yang and Yasheng Sun and Shengyi He and Borong Liang and Yukang Cao and Yingying Li and Haocheng Feng and Errui Ding and Jingdong Wang and Youjian Zhao and Hang Zhou and Ziwei Liu}, year={2025}, eprint={2503.19824}, archivePrefix={arXiv}, primaryClass={cs.GR}, url={https://arxiv.org/abs/2503.19824}, }