Implement a float16 vector addition kernel.
Input: tuple(torch.Tensor, torch.Tensor) with tensors of shape (N, N) and type torch.float16. These tensors are from
a normal distribution with mean 0 and variance 1.
Output: torch.Tensor of shape (N, N) and type torch.float16
Reference Implementation
from utils import make_match_reference
import torch
from task import input_t, output_t
def ref_kernel(data: input_t) -> output_t:
"""
Reference implementation of vector addition using PyTorch.
Args:
data: Tuple of tensors [A, B] to be added.
Returns:
Tensor containing element-wise sums.
"""
A, B = data
return A + B
def generate_input(size: int, seed: int) -> input_t:
"""
Generates random input tensors of specified shapes.
Returns:
Tuple of tensors [A, B] to be added.
"""
gen = torch.Generator(device='cuda')
gen.manual_seed(seed)
A = torch.randn(size, size, device='cuda', dtype=torch.float16, generator=gen).contiguous()
B = torch.randn(size, size, device='cuda', dtype=torch.float16, generator=gen).contiguous()
return (A, B)
check_implementation = make_match_reference(ref_kernel)