Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .ci/scripts/setup-webgpu-linux-deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

# SwiftShader: software Vulkan adapter for GPU-less CI (LunarG SDK not needed).
install_swiftshader() {
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
_swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
_swiftshader_dir=/tmp/swiftshader
mkdir -p $_swiftshader_dir

_tmp_archive="/tmp/${_swiftshader_archive}"

curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
--output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"

tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"

export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
export ETVK_USING_SWIFTSHADER=1
}

install_swiftshader
bash backends/webgpu/scripts/setup-wgpu-native.sh
8 changes: 8 additions & 0 deletions .ci/scripts/test_backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ if [[ "$FLOW" == *vulkan* ]]; then
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
fi

if [[ "$FLOW" == *webgpu* ]]; then
# Setup swiftshader (software Vulkan adapter for GPU-less runners) and wgpu-native,
# which are required to build and run the WebGPU delegate.
source .ci/scripts/setup-webgpu-linux-deps.sh

EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON"
fi

if [[ "$FLOW" == *arm* ]]; then
if [[ "$SUITE" == "operators" ]]; then
PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/test-backend-webgpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Test WebGPU Backend

on:
schedule:
- cron: 0 2 * * *
push:
branches:
- main
- release/*
tags:
- ciflow/nightly/*
pull_request:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

jobs:
test-webgpu:
uses: ./.github/workflows/_test_backend.yml
with:
backend: webgpu
flows: '["webgpu"]'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 120
run-linux: true
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,10 @@ if(EXECUTORCH_BUILD_PYBIND)
list(APPEND _dep_libs vulkan_backend)
endif()

if(EXECUTORCH_BUILD_WEBGPU)
list(APPEND _dep_libs webgpu_backend)
endif()

# compile options for pybind
set(_pybind_compile_options
$<$<CXX_COMPILER_ID:MSVC>:/EHsc
Expand Down
7 changes: 7 additions & 0 deletions backends/test/suite/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ def _load_vulkan() -> list[TestFlow]:
return [VULKAN_TEST_FLOW, VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW]


def _load_webgpu() -> list[TestFlow]:
from executorch.backends.test.suite.flows.webgpu import WEBGPU_TEST_FLOW

return [WEBGPU_TEST_FLOW]


def _load_openvino() -> list[TestFlow]:
from executorch.backends.test.suite.flows.openvino import (
OPENVINO_INT8_TEST_FLOW,
Expand Down Expand Up @@ -178,6 +184,7 @@ def all_flows() -> dict[str, TestFlow]:
+ _register_flow(_load_xnnpack, "XNNPACK")
+ _register_flow(_load_coreml, "Core ML")
+ _register_flow(_load_vulkan, "Vulkan")
+ _register_flow(_load_webgpu, "WebGPU")
+ _register_flow(_load_openvino, "OpenVINO")
+ _register_flow(_load_qnn, "QNN")
+ _register_flow(_load_arm, "ARM")
Expand Down
20 changes: 20 additions & 0 deletions backends/test/suite/flows/webgpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.backends.test.suite.flow import TestFlow
from executorch.backends.webgpu.test.tester import WebGPUTester


def _create_webgpu_flow() -> TestFlow:
return TestFlow(
"webgpu",
backend="webgpu",
tester_factory=WebGPUTester,
skip_patterns=["float16", "float64"], # Not supported in swiftshader
)


WEBGPU_TEST_FLOW = _create_webgpu_flow()
12 changes: 8 additions & 4 deletions backends/webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@ if(NOT TARGET vulkan_schema)
# target), but vulkan_schema is unconditionally defined.
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema EXCLUDE_FROM_ALL
)
endif()

set(WEBGPU_SRCS
runtime/WebGPUBackend.cpp runtime/WebGPUGraph.cpp
runtime/WebGPUDelegateHeader.cpp runtime/WebGPUDevice.cpp
runtime/ops/OperatorRegistry.cpp runtime/ops/add/BinaryOp.cpp
runtime/WebGPUBackend.cpp
runtime/WebGPUGraph.cpp
runtime/WebGPUDelegateHeader.cpp
runtime/WebGPUDevice.cpp
runtime/ops/OperatorRegistry.cpp
runtime/ops/add/BinaryOp.cpp
runtime/ops/rms_norm/RmsNorm.cpp
)

add_library(webgpu_backend ${WEBGPU_SRCS})
Expand Down
5 changes: 5 additions & 0 deletions backends/webgpu/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
2 changes: 1 addition & 1 deletion backends/webgpu/runtime/WebGPUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(
}

try {
graph->build(flatbuffer_data, constant_data);
graph->build(flatbuffer_data, constant_data, context.get_named_data_map());
} catch (const std::exception& e) {
ET_LOG(Error, "WebGPU graph build failed: %s", e.what());
graph->~WebGPUGraph();
Expand Down
6 changes: 6 additions & 0 deletions backends/webgpu/runtime/WebGPUDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,13 @@ WebGPUContext create_webgpu_context() {
device_cb.callback = on_device_request;
device_cb.userdata1 = &device_result;

// Request the adapter's full limits; software adapters default many to 0.
WGPULimits supported_limits = {};
WGPUDeviceDescriptor device_desc = {};
if (wgpuAdapterGetLimits(ctx.adapter, &supported_limits) ==
WGPUStatus_Success) {
device_desc.requiredLimits = &supported_limits;
}
device_desc.uncapturedErrorCallbackInfo.callback = on_device_error;

wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb);
Expand Down
23 changes: 22 additions & 1 deletion backends/webgpu/runtime/WebGPUGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/serialization/schema_generated.h>
#include <executorch/runtime/core/named_data_map.h>

#include <executorch/backends/webgpu/runtime/WebGPUDevice.h>
#include <webgpu/wgpu.h>
Expand Down Expand Up @@ -93,7 +94,8 @@ WebGPUGraph::~WebGPUGraph() {

void WebGPUGraph::build(
const void* flatbuffer_data,
const uint8_t* constant_data) {
const uint8_t* constant_data,
const executorch::runtime::NamedDataMap* named_data_map) {
if (!device_) {
auto* ctx = get_default_webgpu_context();
if (ctx) {
Expand Down Expand Up @@ -165,6 +167,25 @@ void WebGPUGraph::build(
const uint8_t* src = constant_data + vk_bytes->offset();
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, src, tensor.nbytes);
} else if (
vk_bytes->named_key() != nullptr &&
named_data_map != nullptr) {
// Constant stored in the PTE named-data map.
auto buf =
named_data_map->get_data(vk_bytes->named_key()->c_str());
if (buf.ok() && buf->size() >= tensor.nbytes) {
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, buf->data(), tensor.nbytes);
buf->Free();
} else {
throw std::runtime_error(
std::string("WebGPU: named constant '") +
vk_bytes->named_key()->c_str() +
"' missing or undersized in NamedDataMap");
}
} else {
throw std::runtime_error(
"WebGPU: constant has no inline offset and no named-data key");
}
}
}
Expand Down
9 changes: 8 additions & 1 deletion backends/webgpu/runtime/WebGPUGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@
#include <unordered_map>
#include <vector>

#include <executorch/runtime/core/named_data_map.h>

namespace executorch {
namespace backends {
namespace webgpu {

using executorch::runtime::NamedDataMap;

struct WebGPUTensor {
WGPUBuffer buffer = nullptr;
std::vector<int64_t> dims;
Expand Down Expand Up @@ -66,7 +70,10 @@ class WebGPUGraph {

// Build the graph from a deserialized VkGraph flatbuffer and constant data.
// The flatbuffer_data pointer must remain valid during build().
void build(const void* flatbuffer_data, const uint8_t* constant_data);
void build(
const void* flatbuffer_data,
const uint8_t* constant_data,
const NamedDataMap* named_data_map = nullptr);

// Copy input tensor data from host pointers into GPU buffers.
void copy_inputs(const std::vector<std::pair<const void*, size_t>>& inputs);
Expand Down
2 changes: 1 addition & 1 deletion backends/webgpu/runtime/ops/add/binary_add.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ struct Params {
}
@group(0) @binding(3) var<uniform> params: Params;

@compute @workgroup_size(256)
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
let idx = gid.x;
if (idx >= params.num_elements) {
Expand Down
4 changes: 2 additions & 2 deletions backends/webgpu/runtime/ops/add/binary_add_wgsl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct Params {
}
@group(0) @binding(3) var<uniform> params: Params;

@compute @workgroup_size(256)
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
let idx = gid.x;
if (idx >= params.num_elements) {
Expand All @@ -34,7 +34,7 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
}
)";

inline constexpr uint32_t kBinaryAddWorkgroupSize = 256;
inline constexpr uint32_t kBinaryAddWorkgroupSize = 64;

} // namespace webgpu
} // namespace backends
Expand Down
Loading
Loading