Skip to content

Commit 31e9445

Browse files
committed
[WebAssembly] Prototype extending multiplication SIMD instructions
As proposed in WebAssembly/simd#376. This commit implements new builtin functions and intrinsics for these instructions, but does not yet add them to wasm_simd128.h because they have not yet been merged to the proposal. These are the first instructions with opcodes greater than 0xff, so this commit updates the MC layer and disassembler to handle that correctly. Differential Revision: https://reviews.llvm.org/D90253
1 parent 9d72065 commit 31e9445

File tree

9 files changed

+410
-7
lines changed

9 files changed

+410
-7
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,21 @@ TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")
118118

119119
TARGET_BUILTIN(__builtin_wasm_q15mulr_saturate_s_i8x16, "V8sV8sV8s", "nc", "simd128")
120120

121+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
122+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
123+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
124+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
125+
126+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
127+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
128+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
129+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
130+
131+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
132+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
133+
TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
134+
TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
135+
121136
TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")
122137
TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128")
123138

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16600,6 +16600,49 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
1660016600
CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed);
1660116601
return Builder.CreateCall(Callee, {LHS, RHS});
1660216602
}
16603+
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
16604+
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
16605+
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
16606+
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
16607+
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
16608+
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
16609+
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
16610+
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
16611+
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
16612+
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
16613+
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
16614+
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: {
16615+
Value *LHS = EmitScalarExpr(E->getArg(0));
16616+
Value *RHS = EmitScalarExpr(E->getArg(1));
16617+
unsigned IntNo;
16618+
switch (BuiltinID) {
16619+
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
16620+
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
16621+
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
16622+
IntNo = Intrinsic::wasm_extmul_low_signed;
16623+
break;
16624+
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
16625+
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
16626+
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
16627+
IntNo = Intrinsic::wasm_extmul_low_unsigned;
16628+
break;
16629+
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
16630+
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
16631+
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
16632+
IntNo = Intrinsic::wasm_extmul_high_signed;
16633+
break;
16634+
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
16635+
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
16636+
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2:
16637+
IntNo = Intrinsic::wasm_extmul_high_unsigned;
16638+
break;
16639+
default:
16640+
llvm_unreachable("unexptected builtin ID");
16641+
}
16642+
16643+
Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
16644+
return Builder.CreateCall(Callee, {LHS, RHS});
16645+
}
1660316646
case WebAssembly::BI__builtin_wasm_bitselect: {
1660416647
Value *V1 = EmitScalarExpr(E->getArg(0));
1660516648
Value *V2 = EmitScalarExpr(E->getArg(1));

clang/test/CodeGen/builtins-wasm.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,90 @@ i16x8 q15mulr_saturate_s_i16x8(i16x8 x, i16x8 y) {
525525
// WEBASSEMBLY-NEXT: ret
526526
}
527527

528+
i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) {
529+
return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y);
530+
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
531+
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
532+
// WEBASSEMBLY-NEXT: ret
533+
}
534+
535+
i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) {
536+
return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y);
537+
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
538+
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
539+
// WEBASSEMBLY-NEXT: ret
540+
}
541+
542+
u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) {
543+
return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y);
544+
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
545+
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
546+
// WEBASSEMBLY-NEXT: ret
547+
}
548+
549+
u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) {
550+
return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y);
551+
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
552+
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
553+
// WEBASSEMBLY-NEXT: ret
554+
}
555+
556+
i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) {
557+
return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y);
558+
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
559+
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
560+
// WEBASSEMBLY-NEXT: ret
561+
}
562+
563+
i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) {
564+
return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y);
565+
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
566+
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
567+
// WEBASSEMBLY-NEXT: ret
568+
}
569+
570+
u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) {
571+
return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y);
572+
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
573+
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
574+
// WEBASSEMBLY-NEXT: ret
575+
}
576+
577+
u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) {
578+
return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y);
579+
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
580+
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
581+
// WEBASSEMBLY-NEXT: ret
582+
}
583+
584+
i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) {
585+
return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y);
586+
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
587+
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
588+
// WEBASSEMBLY-NEXT: ret
589+
}
590+
591+
i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) {
592+
return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y);
593+
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
594+
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
595+
// WEBASSEMBLY-NEXT: ret
596+
}
597+
598+
u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) {
599+
return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y);
600+
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
601+
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
602+
// WEBASSEMBLY-NEXT: ret
603+
}
604+
605+
u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) {
606+
return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y);
607+
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
608+
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
609+
// WEBASSEMBLY-NEXT: ret
610+
}
611+
528612
i32x4 dot_i16x8_s(i16x8 x, i16x8 y) {
529613
return __builtin_wasm_dot_s_i32x4_i16x8(x, y);
530614
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.dot(<8 x i16> %x, <8 x i16> %y)

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,23 @@ def int_wasm_store64_lane :
259259
def int_wasm_popcnt :
260260
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>;
261261

262+
def int_wasm_extmul_low_signed :
263+
Intrinsic<[llvm_anyvector_ty],
264+
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
265+
[IntrNoMem, IntrSpeculatable]>;
266+
def int_wasm_extmul_high_signed :
267+
Intrinsic<[llvm_anyvector_ty],
268+
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
269+
[IntrNoMem, IntrSpeculatable]>;
270+
def int_wasm_extmul_low_unsigned :
271+
Intrinsic<[llvm_anyvector_ty],
272+
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
273+
[IntrNoMem, IntrSpeculatable]>;
274+
def int_wasm_extmul_high_unsigned :
275+
Intrinsic<[llvm_anyvector_ty],
276+
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
277+
[IntrNoMem, IntrSpeculatable]>;
278+
262279
//===----------------------------------------------------------------------===//
263280
// Thread-local storage intrinsics
264281
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
6262
uint64_t Start = OS.tell();
6363

6464
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
65-
if (Binary <= UINT8_MAX) {
65+
if (Binary < (1 << 8)) {
6666
OS << uint8_t(Binary);
67-
} else {
68-
assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
67+
} else if (Binary < (1 << 16)) {
6968
OS << uint8_t(Binary >> 8);
7069
encodeULEB128(uint8_t(Binary), OS);
70+
} else if (Binary < (1 << 24)) {
71+
OS << uint8_t(Binary >> 16);
72+
encodeULEB128(uint16_t(Binary), OS);
73+
} else {
74+
llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported");
7175
}
7276

7377
// For br_table instructions, encode the size of the table. In the MCInst,

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
1616
list<dag> pattern_r, string asmstr_r = "",
1717
string asmstr_s = "", bits<32> simdop = -1> {
1818
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
19-
!or(0xfd00, !and(0xff, simdop))>,
19+
!if(!ge(simdop, 0x100),
20+
!or(0xfd0000, !and(0xffff, simdop)),
21+
!or(0xfd00, !and(0xff, simdop)))>,
2022
Requires<[HasSIMD128]>;
2123
}
2224

@@ -935,6 +937,57 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
935937
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
936938
186>;
937939

940+
// Extending multiplication: extmul_{low,high}_P, extmul_high
941+
multiclass SIMDExtBinary<ValueType vec_t, ValueType arg_t, string vec,
942+
SDNode node, string name, bits<32> simdop> {
943+
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
944+
(outs), (ins),
945+
[(set (vec_t V128:$dst),
946+
(node (arg_t V128:$lhs), (arg_t V128:$rhs))
947+
)],
948+
vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
949+
simdop>;
950+
}
951+
952+
defm EXTMUL_LOW_S :
953+
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_signed,
954+
"extmul_low_i8x16_s", 154>;
955+
defm EXTMUL_HIGH_S :
956+
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_signed,
957+
"extmul_high_i8x16_s", 157>;
958+
defm EXTMUL_LOW_U :
959+
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_unsigned,
960+
"extmul_low_i8x16_u", 158>;
961+
defm EXTMUL_HIGH_U :
962+
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_unsigned,
963+
"extmul_high_i8x16_u", 159>;
964+
965+
defm EXTMUL_LOW_S :
966+
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_signed,
967+
"extmul_low_i16x8_s", 187>;
968+
defm EXTMUL_HIGH_S :
969+
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_signed,
970+
"extmul_high_i16x8_s", 189>;
971+
defm EXTMUL_LOW_U :
972+
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_unsigned,
973+
"extmul_low_i16x8_u", 190>;
974+
defm EXTMUL_HIGH_U :
975+
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_unsigned,
976+
"extmul_high_i16x8_u", 191>;
977+
978+
defm EXTMUL_LOW_S :
979+
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_signed,
980+
"extmul_low_i32x4_s", 210>;
981+
defm EXTMUL_HIGH_S :
982+
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_signed,
983+
"extmul_high_i32x4_s", 211>;
984+
defm EXTMUL_LOW_U :
985+
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_unsigned,
986+
"extmul_low_i32x4_u", 214>;
987+
defm EXTMUL_HIGH_U :
988+
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_unsigned,
989+
"extmul_high_i32x4_u", 215>;
990+
938991
//===----------------------------------------------------------------------===//
939992
// Floating-point unary arithmetic
940993
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)