
UAV(ドローン)とAMR(地上ロボット)は、互いに補完的な自律プラットフォームである。UAVは到達困難な領域へのアクセス、人命リスクの低減、迅速な展開といった利点を備える一方、バッテリー、ペイロード、気象、規制といった制約に直面する。AMRは工場内において高い稼働持続性、ペイロード、柔軟性を提供するが、垂直方向への到達範囲、地形上の制約、システム統合の手間といった代償を伴う。両者はいずれも6Gネットワークにおける第一級のエンドポイントとなる。
本研究では、新規の強化学習(RL)アルゴリズムを核とするエンドツーエンドの計画フレームワークを提案するとともに、CARLA/AirSimによるシミュレーションパイプラインおよび小規模な物理テストベッドを構築する。本フレームワークは閉ループ評価を対象とし、計画器の出力がエージェントを駆動して逐次的な誤差が累積する状況を扱う。設計は次の三つの性質に基づいて導かれる。すなわち、サイトやタスクを跨いだ転移を可能とする汎化性、学習時および推論時の計算効率、ならびに認識・ダイナミクス・目的関数を差し替え可能とするカスタマイズ性である。
四つのアルゴリズムはマルチエージェントRLという共通の骨格を持ちつつ、それぞれ異なる軸において特化されている。UA-MARL(Uncertainty-Aware Multi-Agent RL)はサンプル効率の向上を目的とする。ITDQN(Imitation-based Triple Deep Q-Learning)は探索と活用のバランスを取ることを意図して設計されている。FM-EAC(Feature Model-based Enhanced Actor-Critic)は学習効率と汎化性の改善を目指す。そして、EIA-SEC(Elite Imitation Actor-Shared Ensemble Critic)は学習効率とカスタマイズ性の向上を目標とする。
アルゴリズム開発は二つのシミュレータによって支えられる。一つはCARLAであり、近代的なレンダリングパイプライン、事前構築された都市マップ、TCP経由で遠隔制御可能なカメラ/LiDARなどのシミュレートセンサを備えたオープンソースの自動運転シミュレータで、AMR側の実験対象として自然な選択肢となる。もう一つはAirSimであり、Unreal Engineを基盤とし、プラットフォーム非依存のAPIを提供することからUAV向けの深層学習およびRL研究で広く用いられている。物理テストベッドは、DJI Tello UAV 4機、Raspberry-Piコントローラ 4台、地上AMR 4台で構成され、今後さらにカメラ、IMU、LiDARの追加が計画されている。このテストベッドは、オペレータの介入が方策更新へとフィードバックされるヒューマン・イン・ザ・ループ実験を支援することを念頭に設計されている。
@inproceedings{Zhou2026b,
title = {Trajectory Planning for UAV-Based Smart Farming Using Imitation-Based Triple Deep Q-Learning },
author = {Quanxi Zhou and Wencan Mao and Tomás Couso Coddou and Manabu Tsukada and Liu Yunling and Yusheng Ji},
year = {2026},
date = {2026-06-01},
urldate = {2026-06-01},
booktitle = {IEEE International Conference on Robotics & Automation (ICRA 2026)},
address = {Vienna, Austria},
abstract = {Unmanned aerial vehicles (UAVs) have emerged as a promising auxiliary platform for smart agriculture, capable of simultaneously performing weed detection, recognition, and data collection from wireless sensors. However, trajectory planning for UAV-based smart agriculture is challenging due to the high uncertainty of the environment, partial observations, and limited battery capacity of UAVs. To address these issues, we formulate the trajectory planning
problem as a Markov decision process (MDP) and leverage multi-agent reinforcement learning (MARL) to solve it. Furthermore, we propose a novel imitation-based triple deep Q-network (ITDQN) algorithm, which employs an elite imitation mechanism to reduce exploration costs and utilizes a mediator Q-network over a double deep Q-network (DDQN) to accelerate and stabilize training and improve performance. Experimental results in both simulated and real-world environments demonstrate the effectiveness of our solution. Moreover, our proposed ITDQN outperforms DDQN by 4.43% in weed recognition rate and 6.94% in data collection rate. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@workshop{Zhou2026,
title = {Deep Reinforcement Learning for Automated Guided Vehicle Trajectory Planning in Industry 4.0},
author = {Quanxi Zhou and Wencan Mao and Yu Xiao and Manabu Tsukada and Yusheng Ji },
year = {2026},
date = {2026-05-18},
booktitle = {INFOCOM 2026 International Workshop on Fusion of Data, Operation, Information, and Communication Technology for Industry 4.0 and Society 5.0 (DOICT-IndSoc)},
abstract = {Automated Guided Vehicles (AGVs) play a vital role in the Fourth Industrial Revolution (Industry 4.0), improving safety, time efficiency, and cost-effectiveness. While existing works focused on centralized or independent AGV control, we propose a distributed strategy for the large-scale, dynamic, and multi-functional environments of Industry 4.0. The proposed strategy enables AGVs to autonomously generate their material delivery trajectories while sharing information to support collaborative searching. Moreover, to enhance effectiveness and efficiency, we propose a Sub-task Agent Triple Deep Q-Network (SA-TDQN) algorithm, which decouples the actors for each sub-task mode, while incorporating a mediator Q-network between the online and target Q-networks. Experiments demonstrate that the proposed strategy is both feasible and effective. Furthermore, SA-TDQN consistently outperforms Deep Q-Network (DQN), Double DQN, and Triple DQN in terms of reward, training efficiency, and convergence stability, with comparable time complexity.},
howpublished = {INFOCOM 2026 International Workshop on Fusion of Data, Operation, Information, and Communication Technology for Industry 4.0 and Society 5.0 (DOICT-IndSoc)},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
@article{Zhou2025b,
title = {A Feature-Aware Elite-Imitation MARL for Multi-UAV Trajectory Optimization in Mountain Terrain Detection},
author = {Quanxi Zhou and Ye Tao and Qianxiao Su and Manabu Tsukada},
url = {https://www.mdpi.com/2504-446X/9/9/645/pdf},
doi = {doi.org/10.3390/drones9090645},
year = {2025},
date = {2025-09-13},
urldate = {2025-09-13},
journal = {Drones},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Zhou2025,
title = {Uncertainty-Aware Multi-Agent Reinforcement Learning for Anti-Interference Trajectory Planning of Cellular-Connected UAVs},
author = {Quanxi Zhou and Wencan Mao and Jin Nakazato and Yusheng Ji and Manabu Tsukada},
doi = {10.1109/TVT.2025.3606201},
isbn = {0018-9545},
year = {2025},
date = {2025-09-04},
urldate = {2025-09-09},
journal = {IEEE Transactions on Vehicular Technology},
pages = {1 - 17},
abstract = {Cellular-connected unmanned aerial vehicles (C-UAVs) will be an integral component of future wireless networks. Thanks to the mobility and maneuverability of UAVs, we can transform the interference management and route scheduling problems of C-UAVs into an anti-interference trajectory planning problem, aiming to jointly minimize the UAV mission time and transmission outage time. However, none of the existing methods have taken both the spatio-temporal uncertainty of interference sources and multi-UAV trajectory planning into consideration. To address this issue, we propose a novel method, referred to as uncertainty-aware multi-agent reinforcement learning (UA-MARL), for anti-interference trajectory planning of C-UAVs. In UA-MARL, a transmission outage probability (TOP) has been introduced to improve the robustness of the model. A transmission outage probability experience memory (TOPEM) has been designed to increase sample efficiency and reduce inference time. MARL algorithms integrated with an adaptive post-decision state (PDS) have been introduced to accelerate the convergence and stabilize the training. Experimental results show that UA-MARL outperforms baselines in average reward, convergence efficiency, and convergence stability. Furthermore, we find that higher residential density and wider considered area will lead to a decrease in training efficiency and stability.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Liu2025,
title = {Multi-Modal Trajectory Planning for Emergency-Oriented Air-Ground Collaborative Sensing and Communication},
author = {Yaxi Liu and Quanxi Zhou and Wencan Mao and Xulong Li and Wei Huangfu and Manabu Tsukada and Yusheng Ji and Keping Long},
doi = {10.1109/TCCN.2025.3585254},
issn = {2332-7731},
year = {2025},
date = {2025-07-04},
urldate = {2025-07-04},
journal = {IEEE Transactions on Cognitive Communications and Networking},
volume = {11},
issue = {5},
pages = {3094-3111},
abstract = {To obtain real-time situational awareness of the world, air-ground collaborative sensing and communication provide a promising solution to form a pervasive cognitive communications and networking system. However, existing schemes struggle to cope with emergencies where ground base stations and Internet of Things devices are temporarily out-of-service. Motivated by this, we envision a novel emergency-oriented air-ground collaborative sensing and communication network where multi-modal cognitive entities (i.e., static/dynamic ground/aerial nodes) cooperatively collect data from IoT devices and simultaneously perform sensing functionality. In such a novel network, an optimization for joint trajectory planning and resource allocation is established to minimize both data transmission task delay and sensing task delay under the constraints of boundary, moving distance, accessible region, and energy consumption for network nodes. To tackle the problem, we propose a transfer learning-based deep reinforcement learning (DRL) framework where three advanced DRL algorithms are included. Such a framework can rapidly adapt to potentially updated environments by facilitating knowledge transfer across tasks for emergency rescue activities. The proposed framework outperforms three state-of-the-art baselines. Moreover, the newly introduced auxiliary cognitive entities facilitate the improvement of sensing and communication functionalities, and the proposed transfer learning-based scheme boosts convergence in fast-changing environments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@article{Zhou2024,
title = {Cellular Connected UAV Anti-Interference Path Planning Based on PDS-DDPG and TOPEM},
author = {Quanxi Zhou and Yongjing Wang and Ruiyu Shen and Jin Nakazato and Manabu Tsukada and Zhenyu Guan},
doi = {10.1109/JMASS.2024.3490762},
issn = {2576-3164},
year = {2024},
date = {2024-11-04},
urldate = {2024-11-04},
journal = {IEEE Journal on Miniaturization for Air and Space Systems},
abstract = {Due to the randomness of channel fading, communication devices, and malicious interference sources, unmanned aerial vehicles (UAVs) face a complex and ever-changing task scenario, which poses significant communication security challenges, such as transmission outages. Fortunately, these communication security challenges can be transformed into path planning problems that minimize the weighted sum of UAV mission time and transmission outage time. In order to design the complex communication environment faced by UAVs in actual scenarios, we propose a system model, including building distribution, communication channel, and antenna design in this paper. Besides, we introduce other UAVs with fixed flight paths and ground interference resources with random locations to ensure mission UAVs have better anti-interference ability. However, it is challenging for classical search algorithms and heuristic algorithms to cope with the complex path problems mentioned above. In this paper, we propose an improved deep deterministic policy gradient (DDPG) algorithm with better performance compared with basic DDPG and DDQN algorithms. Specifically, a post-decision state (PDS) mechanism has been introduced to accelerate the convergence rate and enhance the stability of the training process. In addition, a transmission outage probability experience memory (TOPEM) has been designed to quickly generate wireless communication quality maps and provide temporary experience for the post-decision process, resulting in better training results. Simulation experiments have proven that, compared to basic DDPG, the improved algorithm increases training speed by at least 50%, significantly improves convergence rate, and reduces the episode required for convergence to 20%. It can also help UAVs choose better paths than basic DDPG and DDQN algorithms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
autonomous driving v2x
digital twins extended reality
digital twins
autonomous driving machine learning
machine learning v2x
autonomous driving v2x