1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 3 4declare <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 5declare <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 6declare <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 7declare <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 8declare <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>) 9declare <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>) 10 11declare <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 12declare <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 13declare <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>) 14 15define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) { 16; CHECK-LABEL: trunc_sat_i8i16_maxmin: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vl1re16.v v8, (a0) 19; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 20; CHECK-NEXT: vnclip.wi v8, v8, 0 21; CHECK-NEXT: vse8.v v8, (a1) 22; CHECK-NEXT: ret 23 %1 = load <vscale x 4 x i16>, ptr %x, align 16 24 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -128)) 25 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 127)) 26 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> 27 store <vscale x 4 x i8> %4, ptr %y, align 8 28 ret void 29} 30 31define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) { 32; CHECK-LABEL: trunc_sat_i8i16_minmax: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vl1re16.v v8, (a0) 35; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 36; CHECK-NEXT: vnclip.wi v8, v8, 0 37; CHECK-NEXT: vse8.v v8, (a1) 38; CHECK-NEXT: ret 39 %1 = load <vscale x 4 x i16>, ptr %x, align 16 40 %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127)) 41 %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 -128)) 42 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> 43 store <vscale x 4 x i8> %4, ptr %y, align 8 44 ret void 45} 46 47define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) { 48; CHECK-LABEL: trunc_sat_i8i16_notopt: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vl1re16.v v8, (a0) 51; CHECK-NEXT: li a0, -127 52; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma 53; CHECK-NEXT: vmax.vx v8, v8, a0 54; CHECK-NEXT: li a0, 128 55; CHECK-NEXT: vmin.vx v8, v8, a0 56; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 57; CHECK-NEXT: vnsrl.wi v8, v8, 0 58; CHECK-NEXT: vse8.v v8, (a1) 59; CHECK-NEXT: ret 60 %1 = load <vscale x 4 x i16>, ptr %x, align 16 61 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -127)) 62 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 128)) 63 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> 64 store <vscale x 4 x i8> %4, ptr %y, align 8 65 ret void 66} 67 68define void @trunc_sat_u8u16_min(ptr %x, ptr %y) { 69; CHECK-LABEL: trunc_sat_u8u16_min: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vl1re16.v v8, (a0) 72; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 73; CHECK-NEXT: vnclipu.wi v8, v8, 0 74; CHECK-NEXT: vse8.v v8, (a1) 75; CHECK-NEXT: ret 76 %1 = load <vscale x 4 x i16>, ptr %x, align 16 77 %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255)) 78 %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8> 79 store <vscale x 4 x i8> %3, ptr %y, align 8 80 ret void 81} 82 83define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) { 84; CHECK-LABEL: trunc_sat_u8u16_notopt: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vl1re16.v v8, (a0) 87; CHECK-NEXT: li a0, 127 88; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma 89; CHECK-NEXT: vminu.vx v8, v8, a0 90; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 91; CHECK-NEXT: vnsrl.wi v8, v8, 0 92; CHECK-NEXT: vse8.v v8, (a1) 93; CHECK-NEXT: ret 94 %1 = load <vscale x 4 x i16>, ptr %x, align 16 95 %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127)) 96 %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8> 97 store <vscale x 4 x i8> %3, ptr %y, align 8 98 ret void 99} 100 101define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) { 102; CHECK-LABEL: trunc_sat_u8u16_maxmin: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vl1re16.v v8, (a0) 105; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 106; CHECK-NEXT: vmax.vx v8, v8, zero 107; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 108; CHECK-NEXT: vnclipu.wi v8, v8, 0 109; CHECK-NEXT: vse8.v v8, (a1) 110; CHECK-NEXT: ret 111 %1 = load <vscale x 4 x i16>, ptr %x, align 16 112 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0)) 113 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255)) 114 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> 115 store <vscale x 4 x i8> %4, ptr %y, align 8 116 ret void 117} 118 119define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) { 120; CHECK-LABEL: trunc_sat_u8u16_minmax: 121; CHECK: # %bb.0: 122; CHECK-NEXT: vl1re16.v v8, (a0) 123; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 124; CHECK-NEXT: vmax.vx v8, v8, zero 125; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 126; CHECK-NEXT: vnclipu.wi v8, v8, 0 127; CHECK-NEXT: vse8.v v8, (a1) 128; CHECK-NEXT: ret 129 %1 = load <vscale x 4 x i16>, ptr %x, align 16 130 %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255)) 131 %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0)) 132 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> 133 store <vscale x 4 x i8> %4, ptr %y, align 8 134 ret void 135} 136 137 138define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) { 139; CHECK-LABEL: trunc_sat_i16i32_notopt: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vl2re32.v v8, (a0) 142; CHECK-NEXT: lui a0, 1048568 143; CHECK-NEXT: addi a0, a0, 1 144; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma 145; CHECK-NEXT: vmax.vx v8, v8, a0 146; CHECK-NEXT: lui a0, 8 147; CHECK-NEXT: vmin.vx v8, v8, a0 148; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 149; CHECK-NEXT: vnsrl.wi v10, v8, 0 150; CHECK-NEXT: vs1r.v v10, (a1) 151; CHECK-NEXT: ret 152 %1 = load <vscale x 4 x i32>, ptr %x, align 32 153 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32767)) 154 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32768)) 155 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16> 156 store <vscale x 4 x i16> %4, ptr %y, align 16 157 ret void 158} 159 160define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) { 161; CHECK-LABEL: trunc_sat_i16i32_maxmin: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vl2re32.v v8, (a0) 164; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 165; CHECK-NEXT: vnclip.wi v10, v8, 0 166; CHECK-NEXT: vs1r.v v10, (a1) 167; CHECK-NEXT: ret 168 %1 = load <vscale x 4 x i32>, ptr %x, align 32 169 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32768)) 170 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32767)) 171 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16> 172 store <vscale x 4 x i16> %4, ptr %y, align 16 173 ret void 174} 175 176define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) { 177; CHECK-LABEL: trunc_sat_i16i32_minmax: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vl2re32.v v8, (a0) 180; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 181; CHECK-NEXT: vnclip.wi v10, v8, 0 182; CHECK-NEXT: vs1r.v v10, (a1) 183; CHECK-NEXT: ret 184 %1 = load <vscale x 4 x i32>, ptr %x, align 32 185 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767)) 186 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 -32768)) 187 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16> 188 store <vscale x 4 x i16> %4, ptr %y, align 16 189 ret void 190} 191 192define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) { 193; CHECK-LABEL: trunc_sat_u16u32_notopt: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vl2re32.v v8, (a0) 196; CHECK-NEXT: lui a0, 8 197; CHECK-NEXT: addi a0, a0, -1 198; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma 199; CHECK-NEXT: vminu.vx v8, v8, a0 200; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 201; CHECK-NEXT: vnsrl.wi v10, v8, 0 202; CHECK-NEXT: vs1r.v v10, (a1) 203; CHECK-NEXT: ret 204 %1 = load <vscale x 4 x i32>, ptr %x, align 32 205 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767)) 206 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16> 207 store <vscale x 4 x i16> %3, ptr %y, align 16 208 ret void 209} 210 211define void @trunc_sat_u16u32_min(ptr %x, ptr %y) { 212; CHECK-LABEL: trunc_sat_u16u32_min: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vl2re32.v v8, (a0) 215; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 216; CHECK-NEXT: vnclipu.wi v10, v8, 0 217; CHECK-NEXT: vs1r.v v10, (a1) 218; CHECK-NEXT: ret 219 %1 = load <vscale x 4 x i32>, ptr %x, align 32 220 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535)) 221 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16> 222 store <vscale x 4 x i16> %3, ptr %y, align 16 223 ret void 224} 225 226define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) { 227; CHECK-LABEL: trunc_sat_u16u32_maxmin: 228; CHECK: # %bb.0: 229; CHECK-NEXT: vl2re32.v v8, (a0) 230; CHECK-NEXT: li a0, 1 231; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma 232; CHECK-NEXT: vmax.vx v8, v8, a0 233; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 234; CHECK-NEXT: vnclipu.wi v10, v8, 0 235; CHECK-NEXT: vs1r.v v10, (a1) 236; CHECK-NEXT: ret 237 %1 = load <vscale x 4 x i32>, ptr %x, align 16 238 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 1)) 239 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535)) 240 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16> 241 store <vscale x 4 x i16> %4, ptr %y, align 8 242 ret void 243} 244 245define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) { 246; CHECK-LABEL: trunc_sat_u16u32_minmax: 247; CHECK: # %bb.0: 248; CHECK-NEXT: vl2re32.v v8, (a0) 249; CHECK-NEXT: li a0, 50 250; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma 251; CHECK-NEXT: vmax.vx v8, v8, a0 252; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 253; CHECK-NEXT: vnclipu.wi v10, v8, 0 254; CHECK-NEXT: vs1r.v v10, (a1) 255; CHECK-NEXT: ret 256 %1 = load <vscale x 4 x i32>, ptr %x, align 16 257 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535)) 258 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 50)) 259 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16> 260 store <vscale x 4 x i16> %4, ptr %y, align 8 261 ret void 262} 263 264 265define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) { 266; CHECK-LABEL: trunc_sat_i32i64_notopt: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vl4re64.v v8, (a0) 269; CHECK-NEXT: lui a0, 524288 270; CHECK-NEXT: addiw a0, a0, 1 271; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma 272; CHECK-NEXT: vmax.vx v8, v8, a0 273; CHECK-NEXT: li a0, 1 274; CHECK-NEXT: slli a0, a0, 31 275; CHECK-NEXT: vmin.vx v8, v8, a0 276; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 277; CHECK-NEXT: vnsrl.wi v12, v8, 0 278; CHECK-NEXT: vs2r.v v12, (a1) 279; CHECK-NEXT: ret 280 %1 = load <vscale x 4 x i64>, ptr %x, align 64 281 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483647)) 282 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483648)) 283 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32> 284 store <vscale x 4 x i32> %4, ptr %y, align 32 285 ret void 286} 287 288define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) { 289; CHECK-LABEL: trunc_sat_i32i64_maxmin: 290; CHECK: # %bb.0: 291; CHECK-NEXT: vl4re64.v v8, (a0) 292; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 293; CHECK-NEXT: vnclip.wi v12, v8, 0 294; CHECK-NEXT: vs2r.v v12, (a1) 295; CHECK-NEXT: ret 296 %1 = load <vscale x 4 x i64>, ptr %x, align 64 297 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483648)) 298 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483647)) 299 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32> 300 store <vscale x 4 x i32> %4, ptr %y, align 32 301 ret void 302} 303 304define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) { 305; CHECK-LABEL: trunc_sat_i32i64_minmax: 306; CHECK: # %bb.0: 307; CHECK-NEXT: vl4re64.v v8, (a0) 308; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 309; CHECK-NEXT: vnclip.wi v12, v8, 0 310; CHECK-NEXT: vs2r.v v12, (a1) 311; CHECK-NEXT: ret 312 %1 = load <vscale x 4 x i64>, ptr %x, align 64 313 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647)) 314 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -2147483648)) 315 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32> 316 store <vscale x 4 x i32> %4, ptr %y, align 32 317 ret void 318} 319 320 321define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) { 322; CHECK-LABEL: trunc_sat_u32u64_notopt: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vl4re64.v v8, (a0) 325; CHECK-NEXT: lui a0, 524288 326; CHECK-NEXT: addiw a0, a0, -1 327; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma 328; CHECK-NEXT: vminu.vx v8, v8, a0 329; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 330; CHECK-NEXT: vnsrl.wi v12, v8, 0 331; CHECK-NEXT: vs2r.v v12, (a1) 332; CHECK-NEXT: ret 333 %1 = load <vscale x 4 x i64>, ptr %x, align 64 334 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647)) 335 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32> 336 store <vscale x 4 x i32> %3, ptr %y, align 32 337 ret void 338} 339 340define void @trunc_sat_u32u64_min(ptr %x, ptr %y) { 341; CHECK-LABEL: trunc_sat_u32u64_min: 342; CHECK: # %bb.0: 343; CHECK-NEXT: vl4re64.v v8, (a0) 344; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 345; CHECK-NEXT: vnclipu.wi v12, v8, 0 346; CHECK-NEXT: vs2r.v v12, (a1) 347; CHECK-NEXT: ret 348 %1 = load <vscale x 4 x i64>, ptr %x, align 64 349 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295)) 350 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32> 351 store <vscale x 4 x i32> %3, ptr %y, align 32 352 ret void 353} 354 355 356define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) { 357; CHECK-LABEL: trunc_sat_u32u64_maxmin: 358; CHECK: # %bb.0: 359; CHECK-NEXT: vl4re64.v v8, (a0) 360; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 361; CHECK-NEXT: vmax.vx v8, v8, zero 362; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 363; CHECK-NEXT: vnclipu.wi v12, v8, 0 364; CHECK-NEXT: vs2r.v v12, (a1) 365; CHECK-NEXT: ret 366 %1 = load <vscale x 4 x i64>, ptr %x, align 16 367 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0)) 368 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295)) 369 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32> 370 store <vscale x 4 x i32> %4, ptr %y, align 8 371 ret void 372} 373 374define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) { 375; CHECK-LABEL: trunc_sat_u32u64_minmax: 376; CHECK: # %bb.0: 377; CHECK-NEXT: vl4re64.v v8, (a0) 378; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 379; CHECK-NEXT: vmax.vx v8, v8, zero 380; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 381; CHECK-NEXT: vnclipu.wi v12, v8, 0 382; CHECK-NEXT: vs2r.v v12, (a1) 383; CHECK-NEXT: ret 384 %1 = load <vscale x 4 x i64>, ptr %x, align 16 385 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295)) 386 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0)) 387 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32> 388 store <vscale x 4 x i32> %4, ptr %y, align 8 389 ret void 390} 391 392define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) { 393; CHECK-LABEL: trunc_sat_i8i32_maxmin: 394; CHECK: # %bb.0: 395; CHECK-NEXT: vl2re32.v v8, (a0) 396; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 397; CHECK-NEXT: vnclip.wi v10, v8, 0 398; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 399; CHECK-NEXT: vnclip.wi v8, v10, 0 400; CHECK-NEXT: vse8.v v8, (a1) 401; CHECK-NEXT: ret 402 %1 = load <vscale x 4 x i32>, ptr %x, align 16 403 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -128)) 404 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 127)) 405 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8> 406 store <vscale x 4 x i8> %4, ptr %y, align 8 407 ret void 408} 409 410define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) { 411; CHECK-LABEL: trunc_sat_i8i32_minmax: 412; CHECK: # %bb.0: 413; CHECK-NEXT: vl2re32.v v8, (a0) 414; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 415; CHECK-NEXT: vnclip.wi v10, v8, 0 416; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 417; CHECK-NEXT: vnclip.wi v8, v10, 0 418; CHECK-NEXT: vse8.v v8, (a1) 419; CHECK-NEXT: ret 420 %1 = load <vscale x 4 x i32>, ptr %x, align 16 421 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 127)) 422 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 -128)) 423 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8> 424 store <vscale x 4 x i8> %4, ptr %y, align 8 425 ret void 426} 427 428define void @trunc_sat_u8u32_min(ptr %x, ptr %y) { 429; CHECK-LABEL: trunc_sat_u8u32_min: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vl2re32.v v8, (a0) 432; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 433; CHECK-NEXT: vnclipu.wi v10, v8, 0 434; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 435; CHECK-NEXT: vnclipu.wi v8, v10, 0 436; CHECK-NEXT: vse8.v v8, (a1) 437; CHECK-NEXT: ret 438 %1 = load <vscale x 4 x i32>, ptr %x, align 16 439 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 255)) 440 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i8> 441 store <vscale x 4 x i8> %3, ptr %y, align 8 442 ret void 443} 444 445define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) { 446; CHECK-LABEL: trunc_sat_u8u32_maxmin: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vl2re32.v v8, (a0) 449; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 450; CHECK-NEXT: vmax.vx v8, v8, zero 451; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 452; CHECK-NEXT: vnclipu.wi v10, v8, 0 453; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 454; CHECK-NEXT: vnclipu.wi v8, v10, 0 455; CHECK-NEXT: vse8.v v8, (a1) 456; CHECK-NEXT: ret 457 %1 = load <vscale x 4 x i32>, ptr %x, align 16 458 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 0)) 459 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 255)) 460 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8> 461 store <vscale x 4 x i8> %4, ptr %y, align 8 462 ret void 463} 464 465define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) { 466; CHECK-LABEL: trunc_sat_u8u32_minmax: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vl2re32.v v8, (a0) 469; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 470; CHECK-NEXT: vmax.vx v8, v8, zero 471; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 472; CHECK-NEXT: vnclipu.wi v10, v8, 0 473; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 474; CHECK-NEXT: vnclipu.wi v8, v10, 0 475; CHECK-NEXT: vse8.v v8, (a1) 476; CHECK-NEXT: ret 477 %1 = load <vscale x 4 x i32>, ptr %x, align 16 478 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 255)) 479 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 0)) 480 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8> 481 store <vscale x 4 x i8> %4, ptr %y, align 8 482 ret void 483} 484 485define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) { 486; CHECK-LABEL: trunc_sat_i8i64_maxmin: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vl4re64.v v8, (a0) 489; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 490; CHECK-NEXT: vnclip.wi v12, v8, 0 491; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 492; CHECK-NEXT: vnclip.wi v8, v12, 0 493; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 494; CHECK-NEXT: vnclip.wi v8, v8, 0 495; CHECK-NEXT: vse8.v v8, (a1) 496; CHECK-NEXT: ret 497 %1 = load <vscale x 4 x i64>, ptr %x, align 16 498 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -128)) 499 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 127)) 500 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8> 501 store <vscale x 4 x i8> %4, ptr %y, align 8 502 ret void 503} 504 505define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) { 506; CHECK-LABEL: trunc_sat_i8i64_minmax: 507; CHECK: # %bb.0: 508; CHECK-NEXT: vl4re64.v v8, (a0) 509; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 510; CHECK-NEXT: vnclip.wi v12, v8, 0 511; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 512; CHECK-NEXT: vnclip.wi v8, v12, 0 513; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 514; CHECK-NEXT: vnclip.wi v8, v8, 0 515; CHECK-NEXT: vse8.v v8, (a1) 516; CHECK-NEXT: ret 517 %1 = load <vscale x 4 x i64>, ptr %x, align 16 518 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 127)) 519 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -128)) 520 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8> 521 store <vscale x 4 x i8> %4, ptr %y, align 8 522 ret void 523} 524 525define void @trunc_sat_u8u64_min(ptr %x, ptr %y) { 526; CHECK-LABEL: trunc_sat_u8u64_min: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vl4re64.v v8, (a0) 529; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 530; CHECK-NEXT: vnclipu.wi v12, v8, 0 531; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 532; CHECK-NEXT: vnclipu.wi v8, v12, 0 533; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 534; CHECK-NEXT: vnclipu.wi v8, v8, 0 535; CHECK-NEXT: vse8.v v8, (a1) 536; CHECK-NEXT: ret 537 %1 = load <vscale x 4 x i64>, ptr %x, align 16 538 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 255)) 539 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i8> 540 store <vscale x 4 x i8> %3, ptr %y, align 8 541 ret void 542} 543 544define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) { 545; CHECK-LABEL: trunc_sat_u8u64_maxmin: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vl4re64.v v8, (a0) 548; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 549; CHECK-NEXT: vmax.vx v8, v8, zero 550; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 551; CHECK-NEXT: vnclipu.wi v12, v8, 0 552; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 553; CHECK-NEXT: vnclipu.wi v8, v12, 0 554; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 555; CHECK-NEXT: vnclipu.wi v8, v8, 0 556; CHECK-NEXT: vse8.v v8, (a1) 557; CHECK-NEXT: ret 558 %1 = load <vscale x 4 x i64>, ptr %x, align 16 559 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0)) 560 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 255)) 561 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8> 562 store <vscale x 4 x i8> %4, ptr %y, align 8 563 ret void 564} 565 566define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) { 567; CHECK-LABEL: trunc_sat_u8u64_minmax: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vl4re64.v v8, (a0) 570; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 571; CHECK-NEXT: vmax.vx v8, v8, zero 572; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 573; CHECK-NEXT: vnclipu.wi v12, v8, 0 574; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 575; CHECK-NEXT: vnclipu.wi v8, v12, 0 576; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 577; CHECK-NEXT: vnclipu.wi v8, v8, 0 578; CHECK-NEXT: vse8.v v8, (a1) 579; CHECK-NEXT: ret 580 %1 = load <vscale x 4 x i64>, ptr %x, align 16 581 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 255)) 582 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0)) 583 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8> 584 store <vscale x 4 x i8> %4, ptr %y, align 8 585 ret void 586} 587 588define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) { 589; CHECK-LABEL: trunc_sat_i16i64_maxmin: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vl4re64.v v8, (a0) 592; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 593; CHECK-NEXT: vnclip.wi v12, v8, 0 594; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 595; CHECK-NEXT: vnclip.wi v8, v12, 0 596; CHECK-NEXT: vs1r.v v8, (a1) 597; CHECK-NEXT: ret 598 %1 = load <vscale x 4 x i64>, ptr %x, align 32 599 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -32768)) 600 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 32767)) 601 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16> 602 store <vscale x 4 x i16> %4, ptr %y, align 16 603 ret void 604} 605 606define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) { 607; CHECK-LABEL: trunc_sat_i16i64_minmax: 608; CHECK: # %bb.0: 609; CHECK-NEXT: vl4re64.v v8, (a0) 610; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 611; CHECK-NEXT: vnclip.wi v12, v8, 0 612; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 613; CHECK-NEXT: vnclip.wi v8, v12, 0 614; CHECK-NEXT: vs1r.v v8, (a1) 615; CHECK-NEXT: ret 616 %1 = load <vscale x 4 x i64>, ptr %x, align 32 617 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 32767)) 618 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -32768)) 619 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16> 620 store <vscale x 4 x i16> %4, ptr %y, align 16 621 ret void 622} 623 624define void @trunc_sat_u16u64_min(ptr %x, ptr %y) { 625; CHECK-LABEL: trunc_sat_u16u64_min: 626; CHECK: # %bb.0: 627; CHECK-NEXT: vl4re64.v v8, (a0) 628; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 629; CHECK-NEXT: vnclipu.wi v12, v8, 0 630; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 631; CHECK-NEXT: vnclipu.wi v8, v12, 0 632; CHECK-NEXT: vs1r.v v8, (a1) 633; CHECK-NEXT: ret 634 %1 = load <vscale x 4 x i64>, ptr %x, align 32 635 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 65535)) 636 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i16> 637 store <vscale x 4 x i16> %3, ptr %y, align 16 638 ret void 639} 640 641define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) { 642; CHECK-LABEL: trunc_sat_u16u64_maxmin: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vl4re64.v v8, (a0) 645; CHECK-NEXT: li a0, 1 646; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma 647; CHECK-NEXT: vmax.vx v8, v8, a0 648; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 649; CHECK-NEXT: vnclipu.wi v12, v8, 0 650; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 651; CHECK-NEXT: vnclipu.wi v8, v12, 0 652; CHECK-NEXT: vs1r.v v8, (a1) 653; CHECK-NEXT: ret 654 %1 = load <vscale x 4 x i64>, ptr %x, align 16 655 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 1)) 656 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 65535)) 657 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16> 658 store <vscale x 4 x i16> %4, ptr %y, align 8 659 ret void 660} 661 662define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) { 663; CHECK-LABEL: trunc_sat_u16u64_minmax: 664; CHECK: # %bb.0: 665; CHECK-NEXT: vl4re64.v v8, (a0) 666; CHECK-NEXT: li a0, 50 667; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma 668; CHECK-NEXT: vmax.vx v8, v8, a0 669; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 670; CHECK-NEXT: vnclipu.wi v12, v8, 0 671; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 672; CHECK-NEXT: vnclipu.wi v8, v12, 0 673; CHECK-NEXT: vs1r.v v8, (a1) 674; CHECK-NEXT: ret 675 %1 = load <vscale x 4 x i64>, ptr %x, align 16 676 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 65535)) 677 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 50)) 678 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16> 679 store <vscale x 4 x i16> %4, ptr %y, align 8 680 ret void 681} 682