vectorsum_v2
Deadline
136 days 12 hours remaining (2025-12-30 00:00 UTC)
Language
Python
GPU Types
A100, B200, H100, L4
Description
Implement a vector sum reduction kernel. This kernel computes the sum of all elements in the input tensor. Input: A tensor of shape `(N,)` with values from a normal distribution with mean 0 and variance 1. Output: A scalar value equal to the sum of all elements in the input tensor.
Reference Implementation
from utils import make_match_reference, DeterministicContext
import torch
from task import input_t, output_t
def ref_kernel(data: input_t) -> output_t:
"""
Reference implementation of vector sum reduction using PyTorch.
Args:
data: Input tensor to be reduced
Returns:
Tensor containing the sum of all elements
"""
with DeterministicContext():
data, output = data
# Let's be on the safe side here, and do the reduction in 64 bit
output = data.to(torch.float64).sum().to(torch.float32)
return output
def generate_input(size: int, seed: int) -> input_t:
"""
Generates random input tensor of specified shape with random offset and scale.
The data is first generated as standard normal, then scaled and offset
to prevent trivial solutions.
Returns:
Tensor to be reduced
"""
gen = torch.Generator(device="cuda")
gen.manual_seed(seed)
# Generate base random data
data = torch.randn(
size, device="cuda", dtype=torch.float32, generator=gen
).contiguous()
# Generate random offset and scale (using different seeds to avoid correlation)
offset_gen = torch.Generator(device="cuda")
offset_gen.manual_seed(seed + 1)
scale_gen = torch.Generator(device="cuda")
scale_gen.manual_seed(seed + 2)
# Generate random offset between -100 and 100
offset = (torch.rand(1, device="cuda", generator=offset_gen) * 200 - 100).item()
# Generate random scale between 0.1 and 10
scale = (torch.rand(1, device="cuda", generator=scale_gen) * 9.9 + 0.1).item()
# Apply scale and offset
input_tensor = (data * scale + offset).contiguous()
output_tensor = torch.empty(1, device="cuda", dtype=torch.float32)
return input_tensor, output_tensor
check_implementation = make_match_reference(ref_kernel)
No submissions yet
Be the first to submit a solution for this challenge!