matmul_v2
Deadline
136 days 12 hours remaining (2025-12-30 00:00 UTC)
Language
Python
GPU Types
A100, B200, H100, L4
Description
Implement a custom matmul function that matches the reference implementation. The function should handle a tuple of input tensors and apply matmul The shapes of all outer and inner dimensions of tensors are multiples of 16
Reference Implementation
import torch
from task import input_t, output_t
from utils import make_match_reference, DeterministicContext
def generate_input(m: int, n: int, k: int, seed: int) -> input_t:
gen = torch.Generator(device='cuda')
gen.manual_seed(seed)
a = torch.empty(m, k, device='cuda', dtype=torch.float16)
a.uniform_(0, 1, generator=gen)
b = torch.empty(k, n, device='cuda', dtype=torch.float16)
b.uniform_(0, 1, generator=gen)
c = torch.empty(m, n, device='cuda', dtype=torch.float16)
return a, b, c
def ref_kernel(data: input_t) -> output_t:
with DeterministicContext():
a, b = data
return a @ b
check_implementation = make_match_reference(ref_kernel)
No submissions yet
Be the first to submit a solution for this challenge!