@InProceedings{vx2text_2021_CVPR,
author = {Xudong Lin and Gedas Bertasius and Jue Wang and Shih-Fu Chang and Devi Parikh and Lorenzo Torresani},
title = {Vx2Text: End-to-End Learning of Video-Based Text Generation from Multimodal Inputs},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2021}
}