
UAVs (drones) and AMRs (ground robots) are complementary autonomy platforms. UAVs offer access to hard-to-reach areas, reduced risk to human life, and rapid deployment, but face battery, payload, weather, and regulatory limits. AMRs deliver high endurance, payload, and flexibility on the factory floor at the cost of vertical reach, terrain limits, and integration effort. Both will be first-class endpoints in 6G networks.
This research presents an end-to-end planning framework with novel reinforcement-learning (RL) algorithms, together with a CARLA/AirSim simulation pipeline and a small physical testbed. The framework targets closed-loop evaluation, where planner outputs drive the agent and successive errors compound, and is guided by three properties: generality (transfer across sites and tasks), efficiency (sample- and inference-time), and customizability (swappable perception, dynamics, and objectives).
Four algorithms share a multi-agent RL skeleton and specialize it along different axes. UA-MARL (Uncertainty-Aware Multi-Agent RL) aims to increase sample efficiency. ITDQN (Imitation-based Triple Deep Q-Learning) is designed for balancing exploration and exploitation. FM-EAC (Feature Model-based Enhanced Actor-Critic) targets improving training efficiency and generalizability. Finally, EIA-SEC (Elite Imitation Actor-Shared Ensemble Critic) has the goal of improving training efficiency and customizability.
Two simulators support algorithm development: CARLA, an open-source autonomous-driving simulator with a modern rendering pipeline, pre-made urban maps, and simulated camera/LiDAR sensors controlled remotely over TCP — the natural target for AMR-side experiments; and AirSim, an Unreal-Engine-based simulator with platform-independent APIs widely used for UAV deep-learning and RL research. The physical testbed comprises four DJI Tello UAVs, four Raspberry-Pi controllers, and four ground AMRs, with additional cameras, IMUs, and LiDAR planned. The setup is designed to support human-in-the-loop experimentation in which operator interventions feed back into policy updates.
@inproceedings{Zhou2026b,
title = {Trajectory Planning for UAV-Based Smart Farming Using Imitation-Based Triple Deep Q-Learning },
author = {Quanxi Zhou and Wencan Mao and Tomás Couso Coddou and Manabu Tsukada and Liu Yunling and Yusheng Ji},
year = {2026},
date = {2026-06-01},
urldate = {2026-06-01},
booktitle = {IEEE International Conference on Robotics & Automation (ICRA 2026)},
address = {Vienna, Austria},
abstract = {Unmanned aerial vehicles (UAVs) have emerged as a promising auxiliary platform for smart agriculture, capable of simultaneously performing weed detection, recognition, and data collection from wireless sensors. However, trajectory planning for UAV-based smart agriculture is challenging due to the high uncertainty of the environment, partial observations, and limited battery capacity of UAVs. To address these issues, we formulate the trajectory planning
problem as a Markov decision process (MDP) and leverage multi-agent reinforcement learning (MARL) to solve it. Furthermore, we propose a novel imitation-based triple deep Q-network (ITDQN) algorithm, which employs an elite imitation mechanism to reduce exploration costs and utilizes a mediator Q-network over a double deep Q-network (DDQN) to accelerate and stabilize training and improve performance. Experimental results in both simulated and real-world environments demonstrate the effectiveness of our solution. Moreover, our proposed ITDQN outperforms DDQN by 4.43% in weed recognition rate and 6.94% in data collection rate. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@workshop{Zhou2026,
title = {Deep Reinforcement Learning for Automated Guided Vehicle Trajectory Planning in Industry 4.0},
author = {Quanxi Zhou and Wencan Mao and Yu Xiao and Manabu Tsukada and Yusheng Ji },
year = {2026},
date = {2026-05-18},
booktitle = {INFOCOM 2026 International Workshop on Fusion of Data, Operation, Information, and Communication Technology for Industry 4.0 and Society 5.0 (DOICT-IndSoc)},
abstract = {Automated Guided Vehicles (AGVs) play a vital role in the Fourth Industrial Revolution (Industry 4.0), improving safety, time efficiency, and cost-effectiveness. While existing works focused on centralized or independent AGV control, we propose a distributed strategy for the large-scale, dynamic, and multi-functional environments of Industry 4.0. The proposed strategy enables AGVs to autonomously generate their material delivery trajectories while sharing information to support collaborative searching. Moreover, to enhance effectiveness and efficiency, we propose a Sub-task Agent Triple Deep Q-Network (SA-TDQN) algorithm, which decouples the actors for each sub-task mode, while incorporating a mediator Q-network between the online and target Q-networks. Experiments demonstrate that the proposed strategy is both feasible and effective. Furthermore, SA-TDQN consistently outperforms Deep Q-Network (DQN), Double DQN, and Triple DQN in terms of reward, training efficiency, and convergence stability, with comparable time complexity.},
howpublished = {INFOCOM 2026 International Workshop on Fusion of Data, Operation, Information, and Communication Technology for Industry 4.0 and Society 5.0 (DOICT-IndSoc)},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
@article{Zhou2025b,
title = {A Feature-Aware Elite-Imitation MARL for Multi-UAV Trajectory Optimization in Mountain Terrain Detection},
author = {Quanxi Zhou and Ye Tao and Qianxiao Su and Manabu Tsukada},
url = {https://www.mdpi.com/2504-446X/9/9/645/pdf},
doi = {doi.org/10.3390/drones9090645},
year = {2025},
date = {2025-09-13},
urldate = {2025-09-13},
journal = {Drones},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Zhou2025,
title = {Uncertainty-Aware Multi-Agent Reinforcement Learning for Anti-Interference Trajectory Planning of Cellular-Connected UAVs},
author = {Quanxi Zhou and Wencan Mao and Jin Nakazato and Yusheng Ji and Manabu Tsukada},
doi = {10.1109/TVT.2025.3606201},
isbn = {0018-9545},
year = {2025},
date = {2025-09-04},
urldate = {2025-09-09},
journal = {IEEE Transactions on Vehicular Technology},
pages = {1 - 17},
abstract = {Cellular-connected unmanned aerial vehicles (C-UAVs) will be an integral component of future wireless networks. Thanks to the mobility and maneuverability of UAVs, we can transform the interference management and route scheduling problems of C-UAVs into an anti-interference trajectory planning problem, aiming to jointly minimize the UAV mission time and transmission outage time. However, none of the existing methods have taken both the spatio-temporal uncertainty of interference sources and multi-UAV trajectory planning into consideration. To address this issue, we propose a novel method, referred to as uncertainty-aware multi-agent reinforcement learning (UA-MARL), for anti-interference trajectory planning of C-UAVs. In UA-MARL, a transmission outage probability (TOP) has been introduced to improve the robustness of the model. A transmission outage probability experience memory (TOPEM) has been designed to increase sample efficiency and reduce inference time. MARL algorithms integrated with an adaptive post-decision state (PDS) have been introduced to accelerate the convergence and stabilize the training. Experimental results show that UA-MARL outperforms baselines in average reward, convergence efficiency, and convergence stability. Furthermore, we find that higher residential density and wider considered area will lead to a decrease in training efficiency and stability.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Liu2025,
title = {Multi-Modal Trajectory Planning for Emergency-Oriented Air-Ground Collaborative Sensing and Communication},
author = {Yaxi Liu and Quanxi Zhou and Wencan Mao and Xulong Li and Wei Huangfu and Manabu Tsukada and Yusheng Ji and Keping Long},
doi = {10.1109/TCCN.2025.3585254},
issn = {2332-7731},
year = {2025},
date = {2025-07-04},
urldate = {2025-07-04},
journal = {IEEE Transactions on Cognitive Communications and Networking},
volume = {11},
issue = {5},
pages = {3094-3111},
abstract = {To obtain real-time situational awareness of the world, air-ground collaborative sensing and communication provide a promising solution to form a pervasive cognitive communications and networking system. However, existing schemes struggle to cope with emergencies where ground base stations and Internet of Things devices are temporarily out-of-service. Motivated by this, we envision a novel emergency-oriented air-ground collaborative sensing and communication network where multi-modal cognitive entities (i.e., static/dynamic ground/aerial nodes) cooperatively collect data from IoT devices and simultaneously perform sensing functionality. In such a novel network, an optimization for joint trajectory planning and resource allocation is established to minimize both data transmission task delay and sensing task delay under the constraints of boundary, moving distance, accessible region, and energy consumption for network nodes. To tackle the problem, we propose a transfer learning-based deep reinforcement learning (DRL) framework where three advanced DRL algorithms are included. Such a framework can rapidly adapt to potentially updated environments by facilitating knowledge transfer across tasks for emergency rescue activities. The proposed framework outperforms three state-of-the-art baselines. Moreover, the newly introduced auxiliary cognitive entities facilitate the improvement of sensing and communication functionalities, and the proposed transfer learning-based scheme boosts convergence in fast-changing environments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Zhou2024,
title = {Cellular Connected UAV Anti-Interference Path Planning Based on PDS-DDPG and TOPEM},
author = {Quanxi Zhou and Yongjing Wang and Ruiyu Shen and Jin Nakazato and Manabu Tsukada and Zhenyu Guan},
doi = {10.1109/JMASS.2024.3490762},
issn = {2576-3164},
year = {2024},
date = {2024-11-04},
urldate = {2024-11-04},
journal = {IEEE Journal on Miniaturization for Air and Space Systems},
abstract = {Due to the randomness of channel fading, communication devices, and malicious interference sources, unmanned aerial vehicles (UAVs) face a complex and ever-changing task scenario, which poses significant communication security challenges, such as transmission outages. Fortunately, these communication security challenges can be transformed into path planning problems that minimize the weighted sum of UAV mission time and transmission outage time. In order to design the complex communication environment faced by UAVs in actual scenarios, we propose a system model, including building distribution, communication channel, and antenna design in this paper. Besides, we introduce other UAVs with fixed flight paths and ground interference resources with random locations to ensure mission UAVs have better anti-interference ability. However, it is challenging for classical search algorithms and heuristic algorithms to cope with the complex path problems mentioned above. In this paper, we propose an improved deep deterministic policy gradient (DDPG) algorithm with better performance compared with basic DDPG and DDQN algorithms. Specifically, a post-decision state (PDS) mechanism has been introduced to accelerate the convergence rate and enhance the stability of the training process. In addition, a transmission outage probability experience memory (TOPEM) has been designed to quickly generate wireless communication quality maps and provide temporary experience for the post-decision process, resulting in better training results. Simulation experiments have proven that, compared to basic DDPG, the improved algorithm increases training speed by at least 50%, significantly improves convergence rate, and reduces the episode required for convergence to 20%. It can also help UAVs choose better paths than basic DDPG and DDQN algorithms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
uav
autonomous driving v2x
v2x
digital twins extended reality
digital twins
autonomous driving machine learning
machine learning v2x
autonomous driving v2x
We are part of the University of Tokyo’s Graduate School of Information Science and Technology, Department of Creative Informatics and focuses on computer networks and cyber-physical systems
Address
4F, I-REF building, Graduate School of Information Science and Technology, The University of Tokyo, 1-1-1, Yayoi, Bunkyo-ku, Tokyo, 113-8657 Japan
Room 91B1, Bld 2 of Engineering Department, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-8656, Japan
Mail: