@@ -99,12 +99,36 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
99
99
; ---------------------------------------------------------------------------- ;
100
100
101
101
define i32 @bzhi32_c0 (i32 %val , i32 %numlowbits ) nounwind {
102
- ; GCN-LABEL: bzhi32_c0:
102
+ ; SI-LABEL: bzhi32_c0:
103
+ ; SI: ; %bb.0:
104
+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105
+ ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
106
+ ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
107
+ ; SI-NEXT: v_and_b32_e32 v0, v1, v0
108
+ ; SI-NEXT: s_setpc_b64 s[30:31]
109
+ ;
110
+ ; VI-LABEL: bzhi32_c0:
111
+ ; VI: ; %bb.0:
112
+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113
+ ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
114
+ ; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
115
+ ; VI-NEXT: v_and_b32_e32 v0, v1, v0
116
+ ; VI-NEXT: s_setpc_b64 s[30:31]
117
+ %numhighbits = sub i32 32 , %numlowbits
118
+ %mask = lshr i32 -1 , %numhighbits
119
+ %masked = and i32 %mask , %val
120
+ ret i32 %masked
121
+ }
122
+
123
+ define i32 @bzhi32_c0_clamp (i32 %val , i32 %numlowbits ) nounwind {
124
+ ; GCN-LABEL: bzhi32_c0_clamp:
103
125
; GCN: ; %bb.0:
104
126
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127
+ ; GCN-NEXT: v_and_b32_e32 v1, 31, v1
105
128
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
106
129
; GCN-NEXT: s_setpc_b64 s[30:31]
107
- %numhighbits = sub i32 32 , %numlowbits
130
+ %low5bits = and i32 %numlowbits , 31
131
+ %numhighbits = sub i32 32 , %low5bits
108
132
%mask = lshr i32 -1 , %numhighbits
109
133
%masked = and i32 %mask , %val
110
134
ret i32 %masked
@@ -134,11 +158,21 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
134
158
}
135
159
136
160
define i32 @bzhi32_c4_commutative (i32 %val , i32 %numlowbits ) nounwind {
137
- ; GCN-LABEL: bzhi32_c4_commutative:
138
- ; GCN: ; %bb.0:
139
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140
- ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
141
- ; GCN-NEXT: s_setpc_b64 s[30:31]
161
+ ; SI-LABEL: bzhi32_c4_commutative:
162
+ ; SI: ; %bb.0:
163
+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164
+ ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
165
+ ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
166
+ ; SI-NEXT: v_and_b32_e32 v0, v0, v1
167
+ ; SI-NEXT: s_setpc_b64 s[30:31]
168
+ ;
169
+ ; VI-LABEL: bzhi32_c4_commutative:
170
+ ; VI: ; %bb.0:
171
+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172
+ ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
173
+ ; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
174
+ ; VI-NEXT: v_and_b32_e32 v0, v0, v1
175
+ ; VI-NEXT: s_setpc_b64 s[30:31]
142
176
%numhighbits = sub i32 32 , %numlowbits
143
177
%mask = lshr i32 -1 , %numhighbits
144
178
%masked = and i32 %val , %mask ; swapped order
0 commit comments