Skip to content

[AMDGPU] Add support for v_sin_bf16 on gfx1250 #149241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 17, 2025
Merged

Conversation

shiltian
Copy link
Contributor

Co-authored-by: Mekhanoshin, Stanislav Stanislav.Mekhanoshin@amd.com

@shiltian shiltian requested review from changpeng and rampitec July 17, 2025 03:49
@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 17, 2025
Copy link
Contributor Author

shiltian commented Jul 17, 2025

@llvmbot
Copy link
Member

llvmbot commented Jul 17, 2025

@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Shilei Tian (shiltian)

Changes

Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>


Patch is 66.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149241.diff

20 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2)
  • (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll (+33)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+48)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+56)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+60)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+16)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+45)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+48)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+56)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+60)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+16)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+20)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+63)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+59)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+15)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+100)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+60)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+20)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index d93f5e5b81454..c91319eae7218 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -534,6 +534,7 @@ defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
 defm V_RSQ_BF16  : VOP1Inst_t16 <"v_rsq_bf16",  VOP_BF16_BF16, AMDGPUrsq>;
 defm V_LOG_BF16  : VOP1Inst_t16 <"v_log_bf16",  VOP_BF16_BF16, AMDGPUlogf16>;
 defm V_EXP_BF16  : VOP1Inst_t16 <"v_exp_bf16",  VOP_BF16_BF16, AMDGPUexpf16>;
+defm V_SIN_BF16  : VOP1Inst_t16 <"v_sin_bf16",  VOP_BF16_BF16, AMDGPUsin>;
 }
 } // End TRANS = 1, SchedRW = [WriteTrans32]
 defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1147,6 +1148,7 @@ defm V_SQRT_BF16             : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
 defm V_RSQ_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
 defm V_LOG_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
 defm V_EXP_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
+defm V_SIN_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07e>;
 
 //===----------------------------------------------------------------------===//
 // GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll
new file mode 100644
index 0000000000000..9c35a7eae0b8e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.sin.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}sin_bf16:
+; GCN: v_sin_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @sin_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat %src) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sin_bf16_constant_4
+; GCN: v_sin_bf16_e32 v0, 4.0
+define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat 4.0) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sin_bf16_constant_100
+; GCN: v_sin_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @sin_bf16_constant_100(ptr addrspace(1) %out) #1 {
+  %sin = call bfloat @llvm.amdgcn.sin.bf16(bfloat 100.0) #0
+  store bfloat %sin, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 426f480200e4b..f51d709a594a0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -298,6 +298,51 @@ v_exp_bf16 v5, src_scc
 v_exp_bf16 v127, 0x8000
 // GFX1250: v_exp_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
+v_sin_bf16 v5, v1
+// GFX1250: v_sin_bf16_e32 v5, v1                   ; encoding: [0x01,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, v127
+// GFX1250: v_sin_bf16_e32 v5, v127                 ; encoding: [0x7f,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, s1
+// GFX1250: v_sin_bf16_e32 v5, s1                   ; encoding: [0x01,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, s105
+// GFX1250: v_sin_bf16_e32 v5, s105                 ; encoding: [0x69,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_lo
+// GFX1250: v_sin_bf16_e32 v5, vcc_lo               ; encoding: [0x6a,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_hi
+// GFX1250: v_sin_bf16_e32 v5, vcc_hi               ; encoding: [0x6b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, ttmp15
+// GFX1250: v_sin_bf16_e32 v5, ttmp15               ; encoding: [0x7b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, m0
+// GFX1250: v_sin_bf16_e32 v5, m0                   ; encoding: [0x7d,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_lo
+// GFX1250: v_sin_bf16_e32 v5, exec_lo              ; encoding: [0x7e,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_hi
+// GFX1250: v_sin_bf16_e32 v5, exec_hi              ; encoding: [0x7f,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, null
+// GFX1250: v_sin_bf16_e32 v5, null                 ; encoding: [0x7c,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, -1
+// GFX1250: v_sin_bf16_e32 v5, -1                   ; encoding: [0xc1,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, 0.5
+// GFX1250: v_sin_bf16_e32 v5, 0.5                  ; encoding: [0xf0,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, src_scc
+// GFX1250: v_sin_bf16_e32 v5, src_scc              ; encoding: [0xfd,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v127, 0x8000
+// GFX1250: v_sin_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 93999043d0fb8..39fc73d70cab2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -316,6 +316,54 @@ v_exp_bf16 v127, 0x8000
 v_exp_bf16 v5.h, v1.h
 // GFX1250: v_exp_bf16_e32 v5.h, v1.h               ; encoding: [0x81,0xfb,0x0a,0x7f]
 
+v_sin_bf16 v5, v1
+// GFX1250: v_sin_bf16_e32 v5, v1                   ; encoding: [0x01,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, v127
+// GFX1250: v_sin_bf16_e32 v5, v127                 ; encoding: [0x7f,0xfd,0x0a,0x7e]
+
+v_sin_bf16 v5, s1
+// GFX1250: v_sin_bf16_e32 v5, s1                   ; encoding: [0x01,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, s105
+// GFX1250: v_sin_bf16_e32 v5, s105                 ; encoding: [0x69,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_lo
+// GFX1250: v_sin_bf16_e32 v5, vcc_lo               ; encoding: [0x6a,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, vcc_hi
+// GFX1250: v_sin_bf16_e32 v5, vcc_hi               ; encoding: [0x6b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, ttmp15
+// GFX1250: v_sin_bf16_e32 v5, ttmp15               ; encoding: [0x7b,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, m0
+// GFX1250: v_sin_bf16_e32 v5, m0                   ; encoding: [0x7d,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_lo
+// GFX1250: v_sin_bf16_e32 v5, exec_lo              ; encoding: [0x7e,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, exec_hi
+// GFX1250: v_sin_bf16_e32 v5, exec_hi              ; encoding: [0x7f,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, null
+// GFX1250: v_sin_bf16_e32 v5, null                 ; encoding: [0x7c,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, -1
+// GFX1250: v_sin_bf16_e32 v5, -1                   ; encoding: [0xc1,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, 0.5
+// GFX1250: v_sin_bf16_e32 v5, 0.5                  ; encoding: [0xf0,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v5, src_scc
+// GFX1250: v_sin_bf16_e32 v5, src_scc              ; encoding: [0xfd,0xfc,0x0a,0x7e]
+
+v_sin_bf16 v127, 0x8000
+// GFX1250: v_sin_bf16_e32 v127, 0x8000             ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_sin_bf16 v5.h, v1.h
+// GFX1250: v_sin_bf16_e32 v5.h, v1.h               ; encoding: [0x81,0xfd,0x0a,0x7f]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 459c2d3e7b751..97058eb2e7c9f 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -338,6 +338,62 @@ v_exp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
 // GFX1250: v_exp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sin_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 30355596be48b..6a293c19a79a4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -362,6 +362,66 @@ v_exp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
 // GFX1250: v_exp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sin_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sin_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shl:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_shr:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_ror:15
+// GFX1250: v_sin_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sin_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sin_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfc,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index 50e3e0acae4d2..d1f53c7b2065c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -74,6 +74,18 @@ v_exp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_exp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 34a15116ebed4..dbee9f39df5f5 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -98,6 +98,22 @@ v_exp_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sin_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfc,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sin_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 1d5df8d131228..4257334444244 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -352,6 +352,51 @@ v_exp_bf16_e64 v5, src_scc mul:4
 v_exp_bf16_e64 v255, -|0x8000| clamp div:2
 // GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
 
+v_sin_bf16_e64 v5, v1
+// GFX1250: v_sin_bf16_e64 v5, v1                   ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x00,0x00]
+
+v_sin_bf16_e64 v5, v255
+// GFX1250: v_sin_bf16_e64 v5, v255                 ; encoding: [0x05,0x00,0xfe,0xd5,0xff,0x01,0x00,0x00]
+
+v_sin_bf16_e64 v5, s1
+// GFX1250: v_sin_bf16_e64 v5, s1                   ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x00,0x00,0x00]
+
+v_sin_bf16_e64 v5, s105
+// GFX1250: v_sin_bf16_e64 v5, s105                 ; encoding: [0x05,0x00,0xfe,0xd5,0x69,0x00,0x00,0x00]
+
+v_sin_b...
[truncated]

@shiltian shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 44ec01f to 74b87c9 Compare July 17, 2025 03:50
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen IR generation bugs: mangling, exceptions, etc. labels Jul 17, 2025
@shiltian shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 74b87c9 to 49f17c4 Compare July 17, 2025 03:54
Copy link
Contributor Author

shiltian commented Jul 17, 2025

Merge activity

  • Jul 17, 12:41 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Jul 17, 12:46 PM UTC: Graphite rebased this pull request as part of a merge.
  • Jul 17, 12:49 PM UTC: @shiltian merged this pull request with Graphite.

@shiltian shiltian force-pushed the users/shiltian/v_exp_bf16 branch from 4efddc2 to 62986e0 Compare July 17, 2025 12:43
Base automatically changed from users/shiltian/v_exp_bf16 to main July 17, 2025 12:46
Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
@shiltian shiltian force-pushed the users/shiltian/v_sin_bf16 branch from 49f17c4 to 8012a2d Compare July 17, 2025 12:46
@shiltian shiltian merged commit a102342 into main Jul 17, 2025
7 of 9 checks passed
@shiltian shiltian deleted the users/shiltian/v_sin_bf16 branch July 17, 2025 12:49
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 18, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/22815

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: MC/Mips/reloc-directive.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=o32      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 1
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux -show-encoding -target-abi=o32
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=n32      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 3
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux -show-encoding -target-abi=n32
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=n64      | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 5
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips64-unknown-linux -show-encoding -target-abi=n64
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=ASM /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s -show-encoding -target-abi=o32      -filetype=obj | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-readobj --sections --section-data -r - |      /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=OBJ-O32 /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s # RUN: at line 7
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-mc -triple mips-unknown-linux -show-encoding -target-abi=o32 -filetype=obj
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/llvm-readobj --sections --section-data -r -
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck -check-prefix=OBJ-O32 /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s
/b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s:137:17: error: OBJ-O32-NEXT: is not on the line after the previous match
# OBJ-O32-NEXT: 0x88 R_MIPS_NONE -
                ^
<stdin>:236:2: note: 'next' match was here
 0x88 R_MIPS_NONE -
 ^
<stdin>:233:25: note: previous match ended here
 0x84 R_MICROMIPS_JALR -
                        ^
<stdin>:234:1: note: non-matching line after previous match is here
 0x88 R_MIPS_32 -
^

Input file: <stdin>
Check file: /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/MC/Mips/reloc-directive.s

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        231:  0x7C R_MICROMIPS_TLS_TPREL_LO16 - 
        232:  0x80 R_MIPS_JALR - 
        233:  0x84 R_MICROMIPS_JALR - 
        234:  0x88 R_MIPS_32 - 
        235:  0x88 R_MIPS_64 - 
        236:  0x88 R_MIPS_NONE - 
next:137      !~~~~~~~~~~~~~~~~~  error: match on wrong line
        237:  0x88 R_MIPS_16 - 
        238:  0x1C R_MIPS_GOT16 - 
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants