vectoradd_v2

Deadline

136 days 12 hours remaining (2025-12-30 00:00 UTC)

Language

Python

GPU Types

A100, B200, H100, L4

Description

Implement a float16 vector addition kernel. Input: tuple(torch.Tensor, torch.Tensor) with tensors of shape (N, N) and type torch.float16. These tensors are from a normal distribution with mean 0 and variance 1. Output: torch.Tensor of shape (N, N) and type torch.float16

Reference Implementation

from utils import make_match_reference, DeterministicContext
import torch
from task import input_t, output_t


def ref_kernel(data: input_t) -> output_t:
    """
    Reference implementation of vector addition using PyTorch.
    Args:
        data: Tuple of tensors [A, B] to be added.
    Returns:
        Tensor containing element-wise sums.
    """
    with DeterministicContext():
        A, B, output = data
        output[...] = A + B
        return output


def generate_input(size: int, seed: int) -> input_t:
    """
    Generates random input tensors of specified shapes.
    Returns:
        Tuple of tensors [A, B] to be added.
    """
    gen = torch.Generator(device="cuda")
    gen.manual_seed(seed)
    A = torch.randn(
        size, size, device="cuda", dtype=torch.float16, generator=gen
    ).contiguous()
    B = torch.randn(
        size, size, device="cuda", dtype=torch.float16, generator=gen
    ).contiguous()
    C = torch.empty(size, size, device="cuda", dtype=torch.float16).contiguous()
    return A, B, C


check_implementation = make_match_reference(ref_kernel)

No submissions yet

Be the first to submit a solution for this challenge!