From f6105c66b33ad1da99703da8e0c1bf847a3cd4a6 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Tue, 21 Oct 2025 10:20:53 -0500 Subject: [PATCH] shadertoy_circle.fs.asm : use presubtract to reduce instruction count --- drm/shadertoy_circle.fs.asm | 34 ++++++++++++++-------------------- drm/shadertoy_circle.fs.inc | 27 +++------------------------ regs/us_disassemble2.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 46 deletions(-) diff --git a/drm/shadertoy_circle.fs.asm b/drm/shadertoy_circle.fs.asm index 7fb3dcb..ebedc47 100644 --- a/drm/shadertoy_circle.fs.asm +++ b/drm/shadertoy_circle.fs.asm @@ -1,29 +1,23 @@ -# CONSTS[0] = (-0.1, 0, 0, 0) +# CONST[0] = (-0.1, 0, 0, 0) # d = length(uv) -src0.rgb = temp[0] : +src0.rgb = temp[0] : temp[0].r = DP3 src0.rg0 src0.rg0 ; -src0.rgb = temp[0] : +src0.rgb = temp[0] : temp[0].a = RSQ |src0.r| ; -src0.a = temp[0] : +src0.a = temp[0] : temp[0].a = RCP src0.a ; -# d -= 0.5 -src0.a = temp[0] : - temp[0].r = MAD src0.a00 src0.100 -src0.h00 ; +# d = abs(d - 0.5) * 1 + -0.1 +src0.rgb = float(48), # 0.5 +src1.rgb = temp[0], # d +src2.rgb = const[0], # -0.1 +srcp.rgb = sub : # (src1.rgb - src0.rgb) + temp[0].r = MAD |srcp.r00| src0.100 src2.r00 ; -# d = abs(d) * 1 + -0.1 -src0.rgb = temp[0] , src1.rgb = const[0] : - temp[0].r = MAD |src0.r00| src0.100 src1.r00 ; - -# out.r = (d >= 0.0) ? 1.0 : 0.0 -OUT -src0.rgb = temp[0] : - out[0].r = CMP src0.100 src0.000 src0.r00 ; - -# out.a = 1 -# out.gb = vec2(0, 0) +# d = (d >= 0.0) ? 1.0 : 0.0 +# out.rgba = vec4(d, 0, 0, 1) OUT TEX_SEM_WAIT - : +src0.rgb = temp[0] : out[0].a = MAX src0.1 src0.1 , - out[0].gb = MAX src0.000 src0.000 ; + out[0].rgb = CMP src0.100 src0.000 src0.r00 ; diff --git a/drm/shadertoy_circle.fs.inc b/drm/shadertoy_circle.fs.inc index 9e28b79..05c8e71 100644 --- a/drm/shadertoy_circle.fs.inc +++ b/drm/shadertoy_circle.fs.inc @@ -20,30 +20,9 @@ 0x00000000, 0x00000800, +0x500000b0, 0x08020080, -0x08020000, -0x0093048c, +0x00931483, 0x00000000, -0x00c94000, - -0x00000800, -0x08040000, -0x08020080, -0x00931480, -0x00000000, -0x00481000, - -0x00008001, -0x08020000, -0x08020080, -0x00920498, -0x00000000, -0x00480008, - -0x00070005, -0x08020080, -0x08020080, -0x00920490, -0x00c18003, -0x00000005, +0x00482000, diff --git a/regs/us_disassemble2.py b/regs/us_disassemble2.py index 61c54ac..60b11fe 100644 --- a/regs/us_disassemble2.py +++ b/regs/us_disassemble2.py @@ -1,11 +1,11 @@ import sys -from os import path +from os import path, environ import parse_bits from collections import OrderedDict from functools import partial from pprint import pprint -VERBOSE = False +VERBOSE = environ.get("VERBOSE", "false").lower() == "true" class BaseRegister: def get(self, code, *, code_ix, descriptor): @@ -324,7 +324,13 @@ def disassemble_alu(code, is_output): rgb_swizzle_sel = rgb_swizzle_sel[:rgb_op_operands] a_sources = set(a_sels) + if 3 in a_sources: + a_sources.add(0) + a_sources.add(1) rgb_sources = set(rgb_sels) + if 3 in rgb_sources: + rgb_sources.add(0) + rgb_sources.add(1) a_addr_strs = [s for i, s in enumerate(a_addr_strs) if i in a_sources] rgb_addr_strs = [s for i, s in enumerate(rgb_addr_strs) if i in rgb_sources]