[AMDGPU] Add support for `v_sin_bf16` on gfx1250 #149241

shiltian · 2025-07-17T03:49:08Z

Co-authored-by: Mekhanoshin, Stanislav Stanislav.Mekhanoshin@amd.com

shiltian · 2025-07-17T03:49:28Z

[AMDGPU] Add support for v_sin_bf16 on gfx1250 #149241 👈 (View in Graphite)
[AMDGPU] Add support for v_exp_bf16 on gfx1250 #149229
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-07-17T03:49:36Z

@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Shilei Tian (shiltian)

Changes

Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>

Patch is 66.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149241.diff

20 Files Affected:

(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2)
(added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll (+33)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+48)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+56)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+60)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+16)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+48)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+56)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+60)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+16)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+20)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+63)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+59)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+15)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+100)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+60)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+20)

diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index d93f5e5b81454..c91319eae7218 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -534,6 +534,7 @@ defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
 defm V_RSQ_BF16  : VOP1Inst_t16 <"v_rsq_bf16",  VOP_BF16_BF16, AMDGPUrsq>;
 defm V_LOG_BF16  : VOP1Inst_t16 <"v_log_bf16",  VOP_BF16_BF16, AMDGPUlogf16>;
 defm V_EXP_BF16  : VOP1Inst_t16 <"v_exp_bf16",  VOP_BF16_BF16, AMDGPUexpf16>;
+defm V_SIN_BF16  : VOP1Inst_t16 <"v_sin_bf16",  VOP_BF16_BF16, AMDGPUsin>;
 }
 } // End TRANS = 1, SchedRW = [WriteTrans32]
 defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1147,6 +1148,7 @@ defm V_SQRT_BF16             : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
 defm V_RSQ_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
 defm V_LOG_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
 defm V_EXP_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
+defm V_SIN_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07e>;
 
 //===----------------------------------------------------------------------===//
 // GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll
new file mode 100644
index 0000000000000..9c35a7eae0b8e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.sin.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}sin_bf16:
+; GCN: v_sin_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @sin_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat %src) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sin_bf16_constant_4
+; GCN: v_sin_bf16_e32 v0, 4.0
+define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat 4.0) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sin_bf16_constant_100
+; GCN: v_sin_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @sin_bf16_constant_100(ptr addrspace(1) %out) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat 100.0) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 426f480200e4b..f51d709a594a0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -298,6 +298,51 @@ v_exp_bf16 v5, src_scc
 v_exp_bf16 v127, 0x8000
 // GFX1250: v_exp_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
+v_sin_bf16 v5, v1
+// GFX1250: v_sin_bf16_e32 v5, v1                   ; encoding: [0x01,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, v127
+// GFX1250: v_sin_bf16_e32 v5, v127                 ; encoding: [0x7f,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, s1
+// GFX1250: v_sin_bf16_e32 v5, s1                   ; encoding: [0x01,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, s105
+// GFX1250: v_sin_bf16_e32 v5, s105                 ; encoding: [0x69,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_lo
+// GFX1250: v_sin_bf16_e32 v5, vcc_lo               ; encoding: [0x6a,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_hi
+// GFX1250: v_sin_bf16_e32 v5, vcc_hi               ; encoding: [0x6b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, ttmp15
+// GFX1250: v_sin_bf16_e32 v5, ttmp15               ; encoding: [0x7b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, m0
+// GFX1250: v_sin_bf16_e32 v5, m0                   ; encoding: [0x7d,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_lo
+// GFX1250: v_sin_bf16_e32 v5, exec_lo              ; encoding: [0x7e,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_hi
+// GFX1250: v_sin_bf16_e32 v5, exec_hi              ; encoding: [0x7f,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, null
+// GFX1250: v_sin_bf16_e32 v5, null                 ; encoding: [0x7c,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, -1
+// GFX1250: v_sin_bf16_e32 v5, -1                   ; encoding: [0xc1,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, 0.5
+// GFX1250: v_sin_bf16_e32 v5, 0.5                  ; encoding: [0xf0,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, src_scc
+// GFX1250: v_sin_bf16_e32 v5, src_scc              ; encoding: [0xfd,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v127, 0x8000
+// GFX1250: v_sin_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 93999043d0fb8..39fc73d70cab2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -316,6 +316,54 @@ v_exp_bf16 v127, 0x8000
 v_exp_bf16 v5.h, v1.h
 // GFX1250: v_exp_bf16_e32 v5.h, v1.h               ; encoding: [0x81,0xfb,0x0a,0x7f]
 
+v_sin_bf16 v5, v1
+// GFX1250: v_sin_bf16_e32 v5, v1                   ; encoding: [0x01,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, v127
+// GFX1250: v_sin_bf16_e32 v5, v127                 ; encoding: [0x7f,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, s1
+// GFX1250: v_sin_bf16_e32 v5, s1                   ; encoding: [0x01,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, s105
+// GFX1250: v_sin_bf16_e32 v5, s105                 ; encoding: [0x69,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_lo
+// GFX1250: v_sin_bf16_e32 v5, vcc_lo               ; encoding: [0x6a,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_hi
+// GFX1250: v_sin_bf16_e32 v5, vcc_hi               ; encoding: [0x6b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, ttmp15
+// GFX1250: v_sin_bf16_e32 v5, ttmp15               ; encoding: [0x7b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, m0
+// GFX1250: v_sin_bf16_e32 v5, m0                   ; encoding: [0x7d,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_lo
+// GFX1250: v_sin_bf16_e32 v5, exec_lo              ; encoding: [0x7e,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_hi
+// GFX1250: v_sin_bf16_e32 v5, exec_hi              ; encoding: [0x7f,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, null
+// GFX1250: v_sin_bf16_e32 v5, null                 ; encoding: [0x7c,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, -1
+// GFX1250: v_sin_bf16_e32 v5, -1                   ; encoding: [0xc1,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, 0.5
+// GFX1250: v_sin_bf16_e32 v5, 0.5                  ; encoding: [0xf0,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, src_scc
+// GFX1250: v_sin_bf16_e32 v5, src_scc              ; encoding: [0xfd,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v127, 0x8000
+// GFX1250: v_sin_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_sin_bf16 v5.h, v1.h
+// GFX1250: v_sin_bf16_e32 v5.h, v1.h               ; encoding: [0x81,0xfd,0x0a,0x7f]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 459c2d3e7b751..97058eb2e7c9f 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -338,6 +338,62 @@ v_exp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
 // GFX1250: v_exp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sin_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 30355596be48b..6a293c19a79a4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -362,6 +362,66 @@ v_exp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
 // GFX1250: v_exp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sin_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index 50e3e0acae4d2..d1f53c7b2065c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -74,6 +74,18 @@ v_exp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_exp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 34a15116ebed4..dbee9f39df5f5 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -98,6 +98,22 @@ v_exp_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 1d5df8d131228..4257334444244 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -352,6 +352,51 @@ v_exp_bf16_e64 v5, src_scc mul:4
 v_exp_bf16_e64 v255, -|0x8000| clamp div:2
 // GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
 
+v_sin_bf16_e64 v5, v1
+// GFX1250: v_sin_bf16_e64 v5, v1                   ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x00,0x00]
+
+v_sin_bf16_e64 v5, v255
+// GFX1250: v_sin_bf16_e64 v5, v255                 ; encoding: [0x05,0x00,0xfe,0xd5,0xff,0x01,0x00,0x00]
+
+v_sin_bf16_e64 v5, s1
+// GFX1250: v_sin_bf16_e64 v5, s1                   ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x00,0x00,0x00]
+
+v_sin_bf16_e64 v5, s105
+// GFX1250: v_sin_bf16_e64 v5, s105                 ; encoding: [0x05,0x00,0xfe,0xd5,0x69,0x00,0x00,0x00]
+
+v_sin_b...
[truncated]

shiltian · 2025-07-17T12:41:39Z

Merge activity

Jul 17, 12:41 PM UTC: A user started a stack merge that includes this pull request via Graphite.
Jul 17, 12:46 PM UTC: Graphite rebased this pull request as part of a merge.
Jul 17, 12:49 PM UTC: @shiltian merged this pull request with Graphite.

Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>

llvm-ci · 2025-07-18T13:52:17Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/22815

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: MC/Mips/reloc-directive.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=o32      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 1
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux -show-encoding -target-abi=o32
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=n32      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 3
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux -show-encoding -target-abi=n32
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=n64      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 5
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux -show-encoding -target-abi=n64
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=o32      -filetype=obj | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-readobj --sections --section-data -r - |      /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=OBJ-O32 /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 7
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux -show-encoding -target-abi=o32 -filetype=obj
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-readobj --sections --section-data -r -
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=OBJ-O32 /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s:137:17: error: OBJ-O32-NEXT: is not on the line after the previous match
# OBJ-O32-NEXT: 0x88 R_MIPS_NONE -
                ^
<stdin>:236:2: note: 'next' match was here
 0x88 R_MIPS_NONE -
 ^
<stdin>:233:25: note: previous match ended here
 0x84 R_MICROMIPS_JALR -
                        ^
<stdin>:234:1: note: non-matching line after previous match is here
 0x88 R_MIPS_32 -
^

Input file: <stdin>
Check file: /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        231:  0x7C R_MICROMIPS_TLS_TPREL_LO16 - 
        232:  0x80 R_MIPS_JALR - 
        233:  0x84 R_MICROMIPS_JALR - 
        234:  0x88 R_MIPS_32 - 
        235:  0x88 R_MIPS_64 - 
        236:  0x88 R_MIPS_NONE - 
next:137      !~~~~~~~~~~~~~~~~~  error: match on wrong line
        237:  0x88 R_MIPS_16 - 
        238:  0x1C R_MIPS_GOT16 - 
...

shiltian requested review from changpeng and rampitec July 17, 2025 03:49

llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 17, 2025

shiltian mentioned this pull request Jul 17, 2025

[AMDGPU] Add support for v_exp_bf16 on gfx1250 #149229

Merged

shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 44ec01f to 74b87c9 Compare July 17, 2025 03:50

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen IR generation bugs: mangling, exceptions, etc. labels Jul 17, 2025

shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 74b87c9 to 49f17c4 Compare July 17, 2025 03:54

rampitec approved these changes Jul 17, 2025

View reviewed changes

shiltian force-pushed the users/shiltian/v_exp_bf16 branch from 4efddc2 to 62986e0 Compare July 17, 2025 12:43

Base automatically changed from users/shiltian/v_exp_bf16 to main July 17, 2025 12:46

[AMDGPU] Add support for v_sin_bf16 on gfx1250

8012a2d

Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>

shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 49f17c4 to 8012a2d Compare July 17, 2025 12:46

shiltian merged commit a102342 into main Jul 17, 2025
7 of 9 checks passed

shiltian deleted the users/shiltian/v_sin_bf16 branch July 17, 2025 12:49

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add support for `v_sin_bf16` on gfx1250 #149241

[AMDGPU] Add support for `v_sin_bf16` on gfx1250 #149241

Uh oh!

shiltian commented Jul 17, 2025

Uh oh!

shiltian commented Jul 17, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 17, 2025 •

edited

Loading

Uh oh!

shiltian commented Jul 17, 2025 •

edited

Loading

Uh oh!

Uh oh!

llvm-ci commented Jul 18, 2025

Uh oh!

Uh oh!

[AMDGPU] Add support for v_sin_bf16 on gfx1250 #149241

[AMDGPU] Add support for v_sin_bf16 on gfx1250 #149241

Uh oh!

Conversation

shiltian commented Jul 17, 2025

Uh oh!

shiltian commented Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian commented Jul 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

llvm-ci commented Jul 18, 2025

Uh oh!

Uh oh!

[AMDGPU] Add support for `v_sin_bf16` on gfx1250 #149241

[AMDGPU] Add support for `v_sin_bf16` on gfx1250 #149241

shiltian commented Jul 17, 2025 •

edited

Loading

llvmbot commented Jul 17, 2025 •

edited

Loading

shiltian commented Jul 17, 2025 •

edited

Loading