@InProceedings{avsiam_eccv24, author = {Yan-Bo Lin and Gedas Bertasius}, title = {Siamese Vision Transformers are Scalable Audio-visual Learners}, booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)}, month = {October}, year = {2024} }