grayscale
Deadline
41 days 19 hours (2025-06-30 00:00 UTC)
GPU Types
A100, H100, L4, T4
Description
Implement an RGB to grayscale conversion kernel that matches the reference implementation.
The kernel should convert square RGB images with even sizes to grayscale using the standard coefficients:
Y = 0.2989 R + 0.5870 G + 0.1140 B
Input: RGB tensor of shape (H, W, 3) with values in [0, 1]
Output: Grayscale tensor of shape (H, W) with values in [0, 1]
Show
Copy
Reference Implementation
from utils import make_match_reference
import torch
from task import input_t, output_t
def ref_kernel(data: input_t) -> output_t:
"""
Reference implementation of RGB to grayscale conversion using PyTorch.
Uses the standard coefficients: Y = 0.2989 R + 0.5870 G + 0.1140 B
Args:
data: RGB tensor of shape (H, W, 3) with values in [0, 1]
Returns:
Grayscale tensor of shape (H, W) with values in [0, 1]
"""
# Standard RGB to Grayscale coefficients
weights = torch.tensor([0.2989, 0.5870, 0.1140],
device=data.device,
dtype=data.dtype)
return torch.sum(data * weights, dim=-1)
def generate_input(size: int, seed: int) -> input_t:
"""
Generates random RGB image tensor of specified size.
Returns:
Tensor of shape (size, size, 3) with values in [0, 1]
"""
gen = torch.Generator(device='cuda')
gen.manual_seed(seed)
return torch.rand(size, size, 3,
device='cuda',
dtype=torch.float32,
generator=gen).contiguous()
check_implementation = make_match_reference(ref_kernel, rtol=1e-4, atol=1e-4)
Rankings
L4
Show All (22)
nikhilap 🥇
9109.776μs
grayscale.py
cudawarped 🥈
16225.609μs
+7115.833μs
submission_cuda_inline3.py
Shinsato Masumi 🥉
16465.310μs
+239.701μs
submission3.py
geohot
16908.299μs
+442.990μs
tinygrad.py
Karang
16921.183μs
+12.884μs
grayscale_v1_cuda.py
pongtsu
16955.365μs
+34.182μs
grayscale_fused.py
truk@PLT
17039.918μs
+84.554μs
submission.py
Snektron
17067.446μs
+27.528μs
aaa.py
mobicham
17073.246μs
+5.800μs
grayscale_cuda_v3.py
NJR
17120.590μs
+47.344μs
drjit_v1.py
blueblue
17136.907μs
+16.317μs
sub4.py
ajhinh
17180.198μs
+43.291μs
l4.py
Quantizr
17180.371μs
+0.173μs
grayscale_L4.py
FourCore
17369.045μs
+188.674μs
grayscale.py
salykova
17395.948μs
+26.903μs
v2.py
gau.nernst
17496.732μs
+100.784μs
submission_float12.py
Chadlet
17624.908μs
+128.176μs
AIDE_out_grayscale.py
charles_irl
17647.015μs
+22.107μs
triton.py
Karan Jakhar
17869.179μs
+222.164μs
triton_sub.py
Trax
18322.243μs
+453.065μs
custom_kernel.py
rb
19241.309μs
+919.066μs
submission.py
siro
43783.646μs
+24542.337μs
submission.py
T4
Show All (22)
nikhilap 🥇
8748.413μs
grayscale.py
cudawarped 🥈
16143.984μs
+7395.572μs
submission_cuda_inline_base.py
Shinsato Masumi 🥉
16146.785μs
+2.801μs
submission3.py
pongtsu
16253.513μs
+106.728μs
grayscale_inline.py
truk@PLT
16297.267μs
+43.753μs
submission.py
Joshua Swartz
16468.595μs
+171.328μs
submission.py
ajhinh
16582.775μs
+114.180μs
t4.py
blueblue
16692.272μs
+109.496μs
submission.py
Karang
16807.300μs
+115.029μs
grayscale_v1_cuda.py
NJR
17147.234μs
+339.934μs
drjit_v2.py
charles_irl
17232.580μs
+85.346μs
triton.py
FourCore
17245.283μs
+12.703μs
grayscale.py
Karan Jakhar
17258.813μs
+13.529μs
triton_sub.py
Chadlet
17270.829μs
+12.016μs
AIDE_out_grayscale.py
Trax
17316.619μs
+45.790μs
custom_kernel.py
tomaszki
17338.018μs
+21.399μs
grayscale.py
Anthony
17366.782μs
+28.764μs
submission.py
gau.nernst
17926.891μs
+560.109μs
submission.py
Vlad
19992.128μs
+2065.237μs
g_autotune_triton.py
siro
48176.454μs
+28184.326μs
submission.py
Sharon
48336.262μs
+159.808μs
from_task_import_input_t_output_t.py
Blue×Kill
76614.551μs
+28278.289μs
grayscale_lb5.py
A100
Show All (50)
nikhilap 🥇
1399.930μs
grayscale.py
Shinsato Masumi 🥈
2428.769μs
+1028.840μs
submission.py
salykova 🥉
2437.997μs
+9.227μs
inline_cuda_ptx.py
cudawarped
2441.012μs
+3.016μs
submission_cuda_inline1.py
tomaszki
2464.851μs
+23.839μs
grayscale.py
FourCore
2530.905μs
+66.054μs
grayscale.py
truk@PLT
2536.422μs
+5.518μs
submission.py
dejavucoder
2541.482μs
+5.060μs
fastest.py
Snektron
2555.310μs
+13.828μs
aaa.py
Chadlet
2582.038μs
+26.728μs
AIDE_out_grayscale.py
ajhinh
2606.225μs
+24.186μs
a100.py
NJR
2670.595μs
+64.371μs
drjit_v2.py
pongtsu
2697.218μs
+26.623μs
grayscale_fused.py
mobicham
3068.234μs
+371.015μs
grayscale_v3.7_a100_.py
Leiko
3082.762μs
+14.528μs
lined.py
Quantizr
3087.569μs
+4.807μs
grayscale_a100.py
geohot
3089.071μs
+1.502μs
tinygrad.py
Karang
3094.217μs
+5.147μs
grayscale_v1_cuda.py
f14
3096.536μs
+2.319μs
submission.py
blueblue
3146.586μs
+50.050μs
submission.py
dumball
3155.315μs
+8.729μs
kernel.py
Anthony
3160.102μs
+4.787μs
submission.py
gau.nernst
3163.628μs
+3.526μs
submission.py
jack
3177.263μs
+13.635μs
submission.py
Karan Jakhar
3184.078μs
+6.815μs
triton_sub.py
Joshua Swartz
3185.983μs
+1.905μs
tr2.py
Trax
3209.451μs
+23.468μs
custom_kernel.py
charles_irl
3271.022μs
+61.571μs
triton.py
Anne Ouyang
3289.363μs
+18.341μs
submission.py
artem
3338.851μs
+49.487μs
test.py
Tuna Tuncer
6325.797μs
+2986.947μs
submission.py
osborn0016
9210.789μs
+2884.992μs
submission.py
parrotsky
9262.040μs
+51.251μs
submission.py
siclait
9267.880μs
+5.840μs
submission.py
Art Moskvin
9280.346μs
+12.466μs
submission.py
david_li_55686
9285.460μs
+5.114μs
submission.py
mooglevich
9290.226μs
+4.766μs
submission.py
gauravgokhale
9293.438μs
+3.212μs
submission.py
cloudysky123_18954
9300.627μs
+7.190μs
submission.py
youyc22_78608
9311.323μs
+10.695μs
submission.py
_kernelfolw_
9321.665μs
+10.342μs
submission.py
egghao
9379.731μs
+58.066μs
submission.py
legendary_fawn_56575
9381.815μs
+2.084μs
submission.py
sridharnandigam
9412.135μs
+30.320μs
submission.py
sahanp
9416.182μs
+4.047μs
submission.py
Seraphim
10131.550μs
+715.368μs
submission.py
siro
10159.300μs
+27.751μs
submission.py
roby1805
11392.746μs
+1233.445μs
submission.py
Smexy
14157.763μs
+2765.018μs
submission.py
shikhar
18465.732μs
+4307.969μs
lossfunk.py
H100
Show All (35)
nikhilap 🥇
797.931μs
grayscale.py
charles_irl 🥈
1042.208μs
+244.276μs
triton.py
jack 🥉
1046.473μs
+4.266μs
submission.py
Karang
1392.425μs
+345.952μs
grayscale_v1_cuda.py
Snektron
1393.435μs
+1.010μs
aaa.py
cudawarped
1394.261μs
+0.825μs
submission_cuda_inline6a.py
Shinsato Masumi
1395.372μs
+1.112μs
submission.py
mobicham
1399.076μs
+3.704μs
grayscale_v3.5_h100_.py
tomaszki
1402.417μs
+3.340μs
grayscale.py
salykova
1407.314μs
+4.897μs
inline_cuda_ptx.py
truk@PLT
1407.989μs
+0.675μs
submission.py
geohot
1408.142μs
+0.153μs
tinygrad.py
Quantizr
1410.377μs
+2.235μs
grayscale_h100.py
Nathan Wang
1419.196μs
+8.819μs
grayscale.py
blueblue
1420.670μs
+1.474μs
submission.py
az
1431.599μs
+10.929μs
submission.py
dejavucoder
1432.831μs
+1.232μs
fastest_h100.py
FourCore
1444.351μs
+11.520μs
grayscale.py
Chadlet
1447.537μs
+3.186μs
AIDE_out_grayscale.py
Joshua Swartz
1453.805μs
+6.268μs
tr0.py
gau.nernst
1457.972μs
+4.167μs
submission.py
Karan Jakhar
1459.655μs
+1.683μs
triton_sub.py
pongtsu
1466.360μs
+6.705μs
grayscale_triton.py
ajhinh
1469.029μs
+2.669μs
h100.py
Trax
1473.298μs
+4.269μs
custom_kernel.py
Anne Ouyang
1487.140μs
+13.842μs
submission.py
Anthony
1491.799μs
+4.659μs
submission.py
mancala
1542.927μs
+51.129μs
submission.py
NJR
1590.000μs
+47.073μs
drjit_v2.py
Leiko
1766.470μs
+176.470μs
autotune_abuse.py
Vlad
1931.559μs
+165.089μs
g_autotune_triton.py
Darshan
3454.750μs
+1523.191μs
grayscale_triton.py
Seraphim
6084.559μs
+2629.809μs
submission.py
siro
6118.796μs
+34.237μs
submission.py
rcmalli
6283.132μs
+164.336μs
submission.py