
CRONOSプロジェクトは、多様な移動体と人間を繋ぐ先進的なコミュニケーションプラットフォームの開発を目指しています。私たちの研究は、V2V/V2Xシステム、大規模言語モデル、VRシミュレーション技術を統合し、次世代のモビリティ環境におけるシームレスで安全な、人間中心のインタラクションを実現することに焦点を当てています。
基盤モデルを使用した異種通信メッセージのための堅牢なプラットフォームの開発
多様なニーズとシナリオに対応する進化するコミュニケーションプラットフォームの作成
人間と移動体AIの間のシームレスなコミュニケーションのためのインタラクションの強化
人間-移動体AI共創型交通システムのためのコミュニケーションプラットフォームの展示
open data
open source
open source
autonomous driving machine learning
machine learning v2x
open data
@inproceedings{Li2025d,
title = {Multi-PrefDrive: Optimizing Large Language Models for Autonomous Driving Through Multi-Preference Tuning},
author = {Yun Li and Ehsan Javanmardi and Simon Thompson and Kai Katsumata and Alex Orsholits and Manabu Tsukada},
url = {https://liyun0607.github.io/},
doi = {10.1109/IROS60139.2025.11247608},
year = {2025},
date = {2025-10-19},
urldate = {2025-10-19},
booktitle = {2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
address = {Hangzhou, China},
abstract = {This paper introduces Multi-PrefDrive, a framework that significantly enhances LLM-based autonomous driving through multidimensional preference tuning. Aligning LLMs with human driving preferences is crucial yet challenging, as driving scenarios involve complex decisions where multiple incorrect actions can correspond to a single correct choice. Traditional binary preference tuning fails to capture this complexity. Our approach pairs each chosen action with multiple rejected alternatives, better reflecting real-world driving decisions. By implementing the Plackett-Luce preference model, we enable nuanced ranking of actions across the spectrum of possible errors. Experiments in the CARLA simulator demonstrate that our algorithm achieves an 11.0% improvement in overall score and an 83.6% reduction in
infrastructure collisions, while showing perfect compliance with traffic signals in certain environments. Comparative analysis against DPO and its variants reveals that Multi-PrefDrive’s superior discrimination between chosen and rejected actions, which achieving a margin value of 25, and such ability has been directly translates to enhanced driving performance. We implement memory-efficient techniques including LoRA and 4-bit quantization to enable deployment on consumer-grade hardware and will open-source our training code and multi-rejected dataset to advance research in LLM-based autonomous driving systems. Project Page (https://liyun0607.github.io/)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@inproceedings{Si2025,
title = {You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception},
author = {Hao Si and Ehsan Javanmardi and Manabu Tsukada},
url = {https://sihaoo1.github.io/PHCP_Page/
https://arxiv.org/abs/2509.09310
https://github.com/sihaoo1/PHCP},
year = {2025},
date = {2025-10-19},
urldate = {2025-10-19},
booktitle = {International Conference on Computer Vision (ICCV2025)},
address = {Honolulu, Hawai'i},
abstract = {Collaborative perception enables vehicles to overcome individual perception limitations by sharing information, allowing them to see further and through occlusions. In real-world scenarios, models on different vehicles are often heterogeneous due to manufacturer variations. Existing methods for heterogeneous collaborative perception address this challenge by fine-tuning adapters or the entire network to bridge the domain gap. However, these methods are impractical in real-world applications, as each new collaborator must undergo joint training with the ego vehicle on a dataset before inference, or the ego vehicle stores models for all potential collaborators in advance. Therefore, we pose a new question: Can we tackle this challenge directly during inference, eliminating the need for joint training? To answer this, we introduce Progressive Heterogeneous Collaborative Perception (PHCP), a novel framework that formulates the problem as few-shot unsupervised domain adaptation. Unlike previous work, PHCP dynamically aligns features by self-training an adapter during inference, eliminating the need for labeled data and joint training. Extensive experiments on the OPV2V dataset demonstrate that PHCP achieves strong performance across diverse heterogeneous scenarios. Notably, PHCP achieves performance comparable to SOTA methods trained on the entire dataset while using only a small amount of unlabeled data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@inproceedings{Li2025b,
title = {State-Guided Spatial Cross-Attention for Enhanced End-to-End Autonomous Driving},
author = {Dongyang Li and Ehsan Javanmardi and Manabu Tsukada},
year = {2025},
date = {2025-09-30},
urldate = {2025-09-30},
booktitle = {IEEE International Automated Vehicle Validation Conference (IAVVC 2025)},
address = {Baden-Baden, Germany},
abstract = {Handling near-accident scenarios is a significant challenge for end-to-end autonomous driving (E2E-AD), as these situations often involve sudden environmental changes, complex interactions with other road users, and high-risk decision-making under uncertainty. Unlike routine driving tasks, near-accident scenarios require rapid and precise responses based on external perception and internal vehicle dynamics. Successfully navigating such situations demands not only a comprehensive understanding of the surrounding environment but also an accurate assessment of the ego vehicle's state, including speed, acceleration, and steering angle, to ensure safe and reliable control. However, conventional E2E-AD models struggle to handle these safety-critical situations effectively. Standard approaches primarily rely on raw sensor inputs to learn driving policies, often overlooking the crucial role of vehicle state information in decision-making. Since many near-accident scenarios involve conditions where the same environmental observation could require vastly different responses depending on the ego vehicle's motion state-such as whether the vehicle is braking, accelerating, or experiencing traction loss-ignoring these internal dynamics can lead to unsafe or suboptimal actions. Furthermore, E2E-AD models typically learn a direct mapping from sensory inputs to control outputs, making it difficult to generalize to highly dynamic and unpredictable interactions, such as emergency evasive maneuvers or sudden braking events. To address these challenges, we propose a state-guided cross-attention mechanism that explicitly models the interaction between the ego vehicle's states and its perception of the environment. By incorporating vehicle state information into the decision-making process, our approach ensures that the model can dynamically adjust its attention to critical sensory inputs based on real-time driving conditions. This allows the autonomous system to make more context-aware decisions, improving its ability to respond effectively to complex and safety-critical scenarios.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@inproceedings{Zhang2025,
title = {AWSIM-VR: A Tightly-Coupled Virtual Reality Extension for Human-in-the-Loop Pedestrian-Autonomous Vehicle Interaction},
author = {Shangkai Zhang and Alex Orsholits and Ehsan Javanmardi and Manabu Tsukada},
url = {https://github.com/zhangshangkai/AWSIM-VR},
doi = {10.1109/MetaCom65502.2025.00063},
year = {2025},
date = {2025-08-27},
urldate = {2025-08-27},
booktitle = {3rd Annual IEEE International Conference on Metaverse Computing, Networking, and Applications (IEEE MetaCom 2025)},
address = {Seoul, Republic of Korea},
abstract = {Effective communication between autonomous vehicles (AVs) and pedestrians is crucial for ensuring future urban traffic safety. While external Human-Machine Interfaces (eHMIs) have emerged as promising solutions, current evaluation methodologies — particularly Virtual Reality (VR)-based studies — typically rely on scripted or pre-defined autonomous vehicle behaviors, limiting realism and neglecting pedestrians' active role in interactions. To address this, we introduce AWSIM-VR, a tightly-coupled VR extension of the AWSIM autonomous driving simulator, enabling real-time, human-in-the-loop pedestrian-AV interactions by directly integrating unmodified, real autonomous driving software (Autoware) into the simulation loop. Unlike previous systems, AWSIM-VR provides authentic, bidirectional interaction: pedestrians' actions dynamically influence vehicle decision-making and eHMI responses in real-time, closely emulating real-world AV scenarios. In user studies directly comparing AWSIM-VR to existing methodologies, participants reported significantly higher perceived realism and immersion, underscoring the importance of authentic autonomous behaviors in VR-based pedestrian interaction research. By directly utilizing production-level autonomous driving stacks, AWSIM-VR represents a significant methodological advancement, enabling more realistic, effective, and safer development and evaluation of eHMIs and AV technologies.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We are part of the University of Tokyo’s Graduate School of Information Science and Technology, Department of Creative Informatics and focuses on computer networks and cyber-physical systems
Address
4F, I-REF building, Graduate School of Information Science and Technology, The University of Tokyo, 1-1-1, Yayoi, Bunkyo-ku, Tokyo, 113-8657 Japan
Room 91B1, Bld 2 of Engineering Department, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-8656, Japan
Mail: