
This research investigates a co-processing framework for real-time visual inference in mixed reality (MR), targeting the computational limitations of current head-mounted displays. While modern MR devices support spatial tracking, they lack the capacity to perform high-resolution semantic scene analysis onboard. To address this, we propose an architecture in which semantic segmentation, object detection, and classification are offloaded to external edge AI accelerators.
The system integrates commercial MR headsets (e.g. Magic Leap 2, Meta Quest) with compact processing nodes equipped with devices such as the Hailo-8. Visual input—either full-resolution frames or compressed intermediate features—is transmitted over low-latency wireless links. Inference is performed externally, and structured outputs (e.g. object masks, bounding boxes, scene labels) are returned to the headset and rendered in-engine via Unity or Unreal.
This enables applications to interact meaningfully with their physical surroundings. Virtual content can be dynamically anchored to recognized objects, navigation systems can adapt to context-dependent spatial cues, and application logic can respond to real-time environmental classification. Unlike traditional onboard pipelines, this approach decouples perception from headset constraints, allowing for broader model complexity and higher update rates.
Initial deployments include mobile outdoor scenarios with sub-100 ms round-trip latency, as well as indoor annotation tasks requiring dense semantic feedback. Multi-headset use cases have also been validated through shared inference nodes.
By enabling structured, real-time perception at the scene level, this research contributes to the advancement of Spatial AI: the integration of geometric mapping, semantic interpretation, and real-world interactivity within mixed reality systems.
@inproceedings{Orsholits2025,
title = {Context-Rich Interactions in Mixed Reality through Edge AI Co-Processing},
author = {Alex Orsholits and Manabu Tsukada},
url = {https://link.springer.com/chapter/10.1007/978-3-031-87772-8_3},
doi = {10.1007/978-3-031-87772-8_3},
isbn = {978-3-031-87771-1},
year = {2025},
date = {2025-04-09},
urldate = {2025-04-09},
booktitle = {The 39-th International Conference on Advanced Information Networking and Applications (AINA 2025)},
address = {Barcelona, Spain},
abstract = {Spatial computing is evolving towards leveraging data streaming for computationally demanding applications, facilitating a shift to lightweight, untethered, and standalone devices. These devices are therefore ideal candidates for co-processing, where real-time context understanding and low-latency data streaming are fundamental for seamless, general-purpose Mixed Reality (MR) experiences. This paper demonstrates and evaluates a scalable approach to augmented contextual understanding in MR by implementing multi-modal edge AI co-processing through a Hailo-8 AI accelerator, a low-power ARM-based single board computer (SBC), and the Magic Leap 2 AR headset. The proposed system utilises the native WebRTC streaming capabilities of the Magic Leap 2 to continuously stream camera data to the edge co-processor, where a collection of vision AI models-object detection, pose estimation, face recognition, and depth estimation-are executed. The resulting inferences are then streamed back to the headset for spatial re-projection and transmitted to cloud-based systems for further integration with large-scale AI models, such as LLMs and VLMs. This seamless integration enhances real-time contextual understanding in MR while facilitating advanced multi-modal, multi-device collaboration, supporting richer, scalable spatial cognition across distributed systems.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
@misc{Orsholits2025b,
title = {Edge Vision AI Co-Processing for Dynamic Context Awareness in Mixed Reality},
author = {Alex Orsholits and Manabu Tsukada},
url = {https://www.youtube.com/watch?v=xxahKZl4K9w
https://ieeevr.org/2025/awards/conference-awards/#poster-honorable},
doi = {10.1109/VRW66409.2025.00293},
year = {2025},
date = {2025-03-08},
urldate = {2025-03-08},
booktitle = {2025 IEEE Conference on Virtual Reality and 3D User Interfaces Abstracts and Workshops (VRW)},
address = {Saint-Malo, France},
abstract = {Spatial computing is evolving towards leveraging data streaming for computationally demanding applications, facilitating a shift to lightweight, untethered, and standalone devices. These devices are ideal candidates for co-processing, where real-time scene context understanding and low-latency data streaming are fundamental for general-purpose Mixed Reality (MR) experiences. This poster demonstrates and evaluates a scalable approach to augmented contextual understanding in MR by implementing edge AI co-processing through a Hailo-8 AI accelerator, a low-power ARM-based single board computer (SBC), and the Magic Leap 2 AR headset. The resulting inferences are streamed back to the headset for spatial reprojection into the user’s vision.},
howpublished = {IEEE VR 2025, Poster},
note = {Honorable mention},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
@inproceedings{Zhu2025,
title = {A Distributed Content Subscription Mechanism with Revision Discovery to Decouple Content Sharing Platform and Creator ID},
author = {Zhihai Zhu and Ye Tao and Manabu Tsukada and Hiroshi Esaki},
year = {2025},
date = {2025-02-18},
urldate = {2025-02-18},
booktitle = {International Conference on Artificial Intelligence in Information and Communication (ICAIIC 2025) },
address = {Fukuoka, Japan},
abstract = {Only the chairs can edit This paper proposes a distributed content subscription mechanism that enables content creators to share updates with their audience while maintaining platform independence and anonymity. The mechanism extends the Kademlia distributed hash table (DHT) protocol by incorporating revision numbers and republication timestamps into the DHT key computation, allowing subscribers to discover content updates through heuristic revision queries. It leverages public key cryptography for creator identification and content authenticity, while integrating with established peer-to-peer protocols like BitTorrent for efficient content distribution. Preliminary testing with 200 simulated nodes demonstrates the mechanism's ability to maintain content availability and update discovery even when content creators are offline. This approach particularly benefits creators operating under strict content controls or surveillance, offering them greater creative freedom and distribution autonomy compared to existing centralized and decentralized solutions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
autonomous driving v2x
v2x
digital twins extended reality
digital twins
autonomous driving machine learning
machine learning v2x
autonomous driving v2x
extended reality
We are part of the University of Tokyo’s Graduate School of Information Science and Technology, Department of Creative Informatics and focuses on computer networks and cyber-physical systems
Address
4F, I-REF building, Graduate School of Information Science and Technology, The University of Tokyo, 1-1-1, Yayoi, Bunkyo-ku, Tokyo, 113-8657 Japan
Room 91B1, Bld 2 of Engineering Department, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-8656, Japan
Mail: