‘*’ denotes equal contribution
2024 Pralekha: An Indic Document Alignment Evaluation Benchmark
Sanjay Suryanarayanan, Haiyue Song, Mohammed Safi Ur Rahman Khan , Anoop Kunchukuttan, Mitesh M. Khapra, and Raj Dabre
arXiv preprint arXiv: 2411.19096 , 2024
@article { suryanarayanan2024pralekha ,
title = {Pralekha: An Indic Document Alignment Evaluation Benchmark} ,
author = {Suryanarayanan, Sanjay and Song, Haiyue and Khan, Mohammed Safi Ur Rahman and Kunchukuttan, Anoop and Khapra, Mitesh M. and Dabre, Raj} ,
year = {2024} ,
journal = {arXiv preprint arXiv: 2411.19096} ,
data = {https://huggingface.co/datasets/ai4bharat/Pralekha} ,
}
BhasaAnuvaad: A Speech Translation Dataset for 14 Indian Languages
Sparsh Jain, Ashwin Sankar, Devilal Choudhary, Dhairya Suman, Nikhil Narasimhan, Mohammed Safi Ur Rahman Khan , Anoop Kunchukuttan, Mitesh M Khapra, and Raj Dabre
arXiv preprint arXiv: 2411.04699 , 2024
@article { jain2024bhasaanuvaad ,
title = {BhasaAnuvaad: A Speech Translation Dataset for 14 Indian Languages} ,
author = {Jain, Sparsh and Sankar, Ashwin and Choudhary, Devilal and Suman, Dhairya and Narasimhan, Nikhil and Khan, Mohammed Safi Ur Rahman and Kunchukuttan, Anoop and Khapra, Mitesh M and Dabre, Raj} ,
year = {2024} ,
journal = {arXiv preprint arXiv: 2411.04699} ,
data = {https://huggingface.co/collections/ai4bharat/bhasaanuvaad-672b3790b6470eab68b1cb87} ,
}
MILU: A Multi-task Indic Language Understanding Benchmark
Sshubam Verma, Mohammed Safi Ur Rahman Khan , Vishwajeet Kumar, Rudra Murthy, and Jaydeep Sen
arXiv preprint arXiv: 2411.02538 , 2024
@article { verma2024milu ,
title = {MILU: A Multi-task Indic Language Understanding Benchmark} ,
author = {Verma, Sshubam and Khan, Mohammed Safi Ur Rahman and Kumar, Vishwajeet and Murthy, Rudra and Sen, Jaydeep} ,
year = {2024} ,
journal = {arXiv preprint arXiv: 2411.02538} ,
data = {https://huggingface.co/datasets/ai4bharat/MILU} ,
}
Cross-Lingual Auto Evaluation for Assessing Multilingual LLMs
Sumanth Doddapaneni* , Mohammed Safi Ur Rahman Khan* , Dilip Venkatesh, Raj Dabre, Anoop Kunchukuttan, and Mitesh M. Khapra
arXiv preprint arXiv: 2410.13394 , 2024
@article { doddapaneni2024crosslingual ,
title = {Cross-Lingual Auto Evaluation for Assessing Multilingual LLMs} ,
author = {Doddapaneni, Sumanth and Khan, Mohammed Safi Ur Rahman and Venkatesh, Dilip and Dabre, Raj and Kunchukuttan, Anoop and Khapra, Mitesh M.} ,
year = {2024} ,
journal = {arXiv preprint arXiv: 2410.13394} ,
data = {https://huggingface.co/collections/ai4bharat/cia-suite-66ea9a7e18a6c70bd8de27a1} ,
}
EMNLP 2024 Finding Blind Spots in Evaluator LLMs with Interpretable Checklists
Sumanth Doddapaneni* , Mohammed Safi Ur Rahman Khan* , Sshubam Verma, and Mitesh M. Khapra
Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing , Nov 2024
EMNLP-2024 Outstanding Paper Award
@article { doddapaneni2024finding ,
title = {Finding Blind Spots in Evaluator LLMs with Interpretable Checklists} ,
author = {Doddapaneni, Sumanth and Khan, Mohammed Safi Ur Rahman and Verma, Sshubam and Khapra, Mitesh M.} ,
year = {2024} ,
journal = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing} ,
month = nov ,
address = {Miami, Florida, USA} ,
publisher = {Association for Computational Linguistics} ,
pages = {16279--16309} ,
data = {https://huggingface.co/datasets/ai4bharat/FBI} ,
}
ACL 2024 IndicLLMSuite: A Blueprint for Creating Pre-training and Fine-Tuning Datasets for Indian Languages
Mohammed Safi Ur Rahman Khan* , Priyam Mehta* , Ananth Sankar, Umashankar Kumaravelan, Sumanth Doddapaneni, Suriyaprasaad G, Varun Balan G, Sparsh Jain, Anoop Kunchukuttan, Pratyush Kumar, Raj Dabre, and Mitesh M. Khapra
Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , Aug 2024
ACL-2024 Outstanding Paper Award
@article { khan2024indicllmsuite ,
title = {IndicLLMSuite: A Blueprint for Creating Pre-training and Fine-Tuning Datasets for Indian Languages} ,
author = {Khan, Mohammed Safi Ur Rahman and Mehta, Priyam and Sankar, Ananth and Kumaravelan, Umashankar and Doddapaneni, Sumanth and G, Suriyaprasaad and G, Varun Balan and Jain, Sparsh and Kunchukuttan, Anoop and Kumar, Pratyush and Dabre, Raj and Khapra, Mitesh M.} ,
year = {2024} ,
journal = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)} ,
month = aug ,
address = {Bangkok, Thailand} ,
publisher = {Association for Computational Linguistics} ,
pages = {15831--15879} ,
data = {https://huggingface.co/collections/ai4bharat/indicllmsuite-65ee7d225c337fcfa0991707} ,
}
Airavata: Introducing Hindi Instruction-tuned LLM
Jay Gala, Thanmay Jayakumar, Jaavid Aktar Husain, Aswanth Kumar M, Mohammed Safi Ur Rahman Khan , Diptesh Kanojia, Ratish Puduppully, Mitesh M. Khapra, Raj Dabre, Rudra Murthy, and Anoop Kunchukuttan
arXiv preprint arXiv: 2401.15006 , Aug 2024
@article { gala2024airavata ,
title = {Airavata: Introducing Hindi Instruction-tuned LLM} ,
author = {Gala, Jay and Jayakumar, Thanmay and Husain, Jaavid Aktar and M, Aswanth Kumar and Khan, Mohammed Safi Ur Rahman and Kanojia, Diptesh and Puduppully, Ratish and Khapra, Mitesh M. and Dabre, Raj and Murthy, Rudra and Kunchukuttan, Anoop} ,
year = {2024} ,
journal = {arXiv preprint arXiv: 2401.15006} ,
data = {https://huggingface.co/datasets/ai4bharat/indic-instruct-data-v0.1} ,
}