struct Settings { inverseDiffusionRateTrails: f32, decayRateTrails: f32, diffusionNeighborScale: f32, brushDecayAlphaMultiplier: f32, brushDecayAlphaSubtract: f32, padding0: f32, padding1: f32, padding2: f32, }; const WORKGROUP_SIZE_X = 16u; const WORKGROUP_SIZE_Y = 16u; // One-pixel halo on each side so the 3x3 neighbourhood read in the main pass // can be served from workgroup memory without bounds checks for interior tiles. const TILE_SIZE_X = WORKGROUP_SIZE_X + 2u; const TILE_SIZE_Y = WORKGROUP_SIZE_Y + 2u; const TILE_TEXEL_COUNT = TILE_SIZE_X * TILE_SIZE_Y; // 1.0 / 2^32, used to map a 32-bit hash to [0, 1). const HASH_TO_UNIT_FLOAT: f32 = 2.3283064365386963e-10; @group(0) @binding(0) var settings: Settings; @group(0) @binding(1) var trailMap: texture_2d; @group(0) @binding(2) var trailMapOut: texture_storage_2d; var tile: array, 324>; var tileTrailStrength: array; @compute @workgroup_size(16, 16) fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) workgroup_id: vec3 ) { let textureSize = vec2(textureDimensions(trailMap, 0)); let textureSizeU32 = vec2(textureSize); let localLinearIndex = local_id.y * WORKGROUP_SIZE_X + local_id.x; let workgroupOrigin = workgroup_id.xy * vec2(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y); let isInteriorTile = workgroupOrigin.x > 0u && workgroupOrigin.y > 0u && workgroupOrigin.x + WORKGROUP_SIZE_X < textureSizeU32.x && workgroupOrigin.y + WORKGROUP_SIZE_Y < textureSizeU32.y; for (var tileIndex = localLinearIndex; tileIndex < TILE_TEXEL_COUNT; tileIndex += WORKGROUP_SIZE_X * WORKGROUP_SIZE_Y) { let tilePosition = vec2(tileIndex % TILE_SIZE_X, tileIndex / TILE_SIZE_X); let unclampedSourcePixel = vec2(workgroupOrigin + tilePosition) - vec2(1, 1); var sourcePixel = unclampedSourcePixel; if !isInteriorTile { sourcePixel = clamp(unclampedSourcePixel, vec2(0, 0), textureSize - vec2(1, 1)); } let texel = textureLoad(trailMap, sourcePixel, 0); tile[tileIndex] = texel; tileTrailStrength[tileIndex] = length(texel.rgb); } workgroupBarrier(); let pixel = vec2(i32(global_id.x), i32(global_id.y)); let inBounds = pixel.x < textureSize.x && pixel.y < textureSize.y; if !inBounds { return; } let centerTilePosition = local_id.xy + vec2(1u, 1u); let centerTileIndex = centerTilePosition.y * TILE_SIZE_X + centerTilePosition.x; var current = tile[centerTileIndex]; let random = random_from_pixel(pixel); let trailWeight = diffusion_weight( random, settings.inverseDiffusionRateTrails ); current += ( propagate(centerTileIndex, -1, -1, current, trailWeight) + propagate(centerTileIndex, -1, 1, current, trailWeight) + propagate(centerTileIndex, 1, -1, current, trailWeight) + propagate(centerTileIndex, 1, 1, current, trailWeight) + propagate(centerTileIndex, -1, 0, current, trailWeight) + propagate(centerTileIndex, 0, -1, current, trailWeight) + propagate(centerTileIndex, 1, 0, current, trailWeight) + propagate(centerTileIndex, 0, 1, current, trailWeight) ) * settings.diffusionNeighborScale; let decayed = clamp(vec4( current.rgb * settings.decayRateTrails, max(0, current.a * settings.brushDecayAlphaMultiplier - settings.brushDecayAlphaSubtract) ), vec4(0), vec4(1)); textureStore(trailMapOut, pixel, decayed); } fn propagate( centerTileIndex: u32, offsetX: i32, offsetY: i32, currentColor: vec4, trailWeight: f32 ) -> vec4 { let neighbourIndex = i32(centerTileIndex) + offsetY * i32(TILE_SIZE_X) + offsetX; let neighbourTileIndex = u32(neighbourIndex); let neighbour = tile[neighbourTileIndex]; let difference = clamp(neighbour - currentColor, vec4(0), vec4(1)); return vec4( vec3(tileTrailStrength[neighbourTileIndex] * trailWeight), neighbour.a * trailWeight ) * difference; } fn random_from_pixel(pixel: vec2) -> f32 { let p = vec2(pixel); var hash = p.x * 1664525u + p.y * 1013904223u + 374761393u; hash = (hash ^ (hash >> 16u)) * 2246822519u; hash = (hash ^ (hash >> 13u)) * 3266489917u; hash = hash ^ (hash >> 16u); return f32(hash) * HASH_TO_UNIT_FLOAT; } // Approximates pow(r, inverseRate) piecewise between powers (r, r^2, r^4, r^8, r^16) // so we can vary diffusion sharpness without paying for a real pow() per pixel. fn diffusion_weight( r: f32, inverseRate: f32 ) -> f32 { if inverseRate < 1.0 { let rootApproximation = r / max(0.5 + r * 0.5, 0.0001); return mix( rootApproximation, r, clamp((inverseRate - 0.5) * 2.0, 0.0, 1.0) ); } let r2 = r * r; if inverseRate < 2.0 { return mix(r, r2, inverseRate - 1.0); } let r4 = r2 * r2; if inverseRate < 4.0 { // (inverseRate - 2.0) / (4.0 - 2.0) return mix(r2, r4, (inverseRate - 2.0) * 0.5); } let r8 = r4 * r4; if inverseRate < 8.0 { // (inverseRate - 4.0) / (8.0 - 4.0) return mix(r4, r8, (inverseRate - 4.0) * 0.25); } let r16 = r8 * r8; // (inverseRate - 8.0) / (16.0 - 8.0); past 16, falls off as 16/inverseRate. return mix(r8, r16, clamp((inverseRate - 8.0) * 0.125, 0.0, 1.0)) * min(1.0, 16.0 / inverseRate); }