@@ -9,15 +9,10 @@ define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %
9
9
; CHECK-NEXT: ptrue p0.d
10
10
; CHECK-NEXT: and z1.d, z1.d, #0xff
11
11
; CHECK-NEXT: and z0.d, z0.d, #0xff
12
- ; CHECK-NEXT: movprfx z2, z0
13
- ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
14
- ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
15
- ; CHECK-NEXT: lsr z1.d, z2.d, #8
16
- ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
12
+ ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
13
+ ; CHECK-NEXT: lsr z1.d, z0.d, #8
17
14
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
18
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
19
- ; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
20
- ; CHECK-NEXT: mov z0.d, z2.d
15
+ ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
21
16
; CHECK-NEXT: ret
22
17
%a = call { <vscale x 2 x i8 >, <vscale x 2 x i1 > } @llvm.umul.with.overflow.nxv2i8 (<vscale x 2 x i8 > %x , <vscale x 2 x i8 > %y )
23
18
%b = extractvalue { <vscale x 2 x i8 >, <vscale x 2 x i1 > } %a , 0
@@ -34,15 +29,10 @@ define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %
34
29
; CHECK-NEXT: ptrue p0.s
35
30
; CHECK-NEXT: and z1.s, z1.s, #0xff
36
31
; CHECK-NEXT: and z0.s, z0.s, #0xff
37
- ; CHECK-NEXT: movprfx z2, z0
38
- ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
39
- ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
40
- ; CHECK-NEXT: lsr z1.s, z2.s, #8
41
- ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
32
+ ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
33
+ ; CHECK-NEXT: lsr z1.s, z0.s, #8
42
34
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
43
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
44
- ; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
45
- ; CHECK-NEXT: mov z0.d, z2.d
35
+ ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
46
36
; CHECK-NEXT: ret
47
37
%a = call { <vscale x 4 x i8 >, <vscale x 4 x i1 > } @llvm.umul.with.overflow.nxv4i8 (<vscale x 4 x i8 > %x , <vscale x 4 x i8 > %y )
48
38
%b = extractvalue { <vscale x 4 x i8 >, <vscale x 4 x i1 > } %a , 0
@@ -59,15 +49,10 @@ define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
59
49
; CHECK-NEXT: ptrue p0.h
60
50
; CHECK-NEXT: and z1.h, z1.h, #0xff
61
51
; CHECK-NEXT: and z0.h, z0.h, #0xff
62
- ; CHECK-NEXT: movprfx z2, z0
63
- ; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
64
- ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
65
- ; CHECK-NEXT: lsr z1.h, z2.h, #8
66
- ; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
52
+ ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
53
+ ; CHECK-NEXT: lsr z1.h, z0.h, #8
67
54
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
68
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
69
- ; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
70
- ; CHECK-NEXT: mov z0.d, z2.d
55
+ ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
71
56
; CHECK-NEXT: ret
72
57
%a = call { <vscale x 8 x i8 >, <vscale x 8 x i1 > } @llvm.umul.with.overflow.nxv8i8 (<vscale x 8 x i8 > %x , <vscale x 8 x i8 > %y )
73
58
%b = extractvalue { <vscale x 8 x i8 >, <vscale x 8 x i1 > } %a , 0
@@ -164,15 +149,10 @@ define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i1
164
149
; CHECK-NEXT: ptrue p0.d
165
150
; CHECK-NEXT: and z1.d, z1.d, #0xffff
166
151
; CHECK-NEXT: and z0.d, z0.d, #0xffff
167
- ; CHECK-NEXT: movprfx z2, z0
168
- ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
169
- ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
170
- ; CHECK-NEXT: lsr z1.d, z2.d, #16
171
- ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
152
+ ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
153
+ ; CHECK-NEXT: lsr z1.d, z0.d, #16
172
154
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
173
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
174
- ; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
175
- ; CHECK-NEXT: mov z0.d, z2.d
155
+ ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
176
156
; CHECK-NEXT: ret
177
157
%a = call { <vscale x 2 x i16 >, <vscale x 2 x i1 > } @llvm.umul.with.overflow.nxv2i16 (<vscale x 2 x i16 > %x , <vscale x 2 x i16 > %y )
178
158
%b = extractvalue { <vscale x 2 x i16 >, <vscale x 2 x i1 > } %a , 0
@@ -189,15 +169,10 @@ define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i1
189
169
; CHECK-NEXT: ptrue p0.s
190
170
; CHECK-NEXT: and z1.s, z1.s, #0xffff
191
171
; CHECK-NEXT: and z0.s, z0.s, #0xffff
192
- ; CHECK-NEXT: movprfx z2, z0
193
- ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
194
- ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
195
- ; CHECK-NEXT: lsr z1.s, z2.s, #16
196
- ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
172
+ ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
173
+ ; CHECK-NEXT: lsr z1.s, z0.s, #16
197
174
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
198
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
199
- ; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
200
- ; CHECK-NEXT: mov z0.d, z2.d
175
+ ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
201
176
; CHECK-NEXT: ret
202
177
%a = call { <vscale x 4 x i16 >, <vscale x 4 x i1 > } @llvm.umul.with.overflow.nxv4i16 (<vscale x 4 x i16 > %x , <vscale x 4 x i16 > %y )
203
178
%b = extractvalue { <vscale x 4 x i16 >, <vscale x 4 x i1 > } %a , 0
@@ -294,15 +269,10 @@ define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i3
294
269
; CHECK-NEXT: ptrue p0.d
295
270
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
296
271
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
297
- ; CHECK-NEXT: movprfx z2, z0
298
- ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
299
- ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
300
- ; CHECK-NEXT: lsr z1.d, z2.d, #32
301
- ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
272
+ ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
273
+ ; CHECK-NEXT: lsr z1.d, z0.d, #32
302
274
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
303
- ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
304
- ; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
305
- ; CHECK-NEXT: mov z0.d, z2.d
275
+ ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
306
276
; CHECK-NEXT: ret
307
277
%a = call { <vscale x 2 x i32 >, <vscale x 2 x i1 > } @llvm.umul.with.overflow.nxv2i32 (<vscale x 2 x i32 > %x , <vscale x 2 x i32 > %y )
308
278
%b = extractvalue { <vscale x 2 x i32 >, <vscale x 2 x i1 > } %a , 0
0 commit comments