1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 3 4declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>) 5declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>) 6declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 7declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 8declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) 9declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) 10 11declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>) 12declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 13declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) 14 15define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) { 16; CHECK-LABEL: trunc_sat_i8i16_maxmin: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 19; CHECK-NEXT: vle16.v v8, (a0) 20; CHECK-NEXT: vnclip.wi v8, v8, 0 21; CHECK-NEXT: vse8.v v8, (a1) 22; CHECK-NEXT: ret 23 %1 = load <4 x i16>, ptr %x, align 16 24 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>) 25 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 127, i16 127, i16 127, i16 127>) 26 %4 = trunc <4 x i16> %3 to <4 x i8> 27 store <4 x i8> %4, ptr %y, align 8 28 ret void 29} 30 31define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) { 32; CHECK-LABEL: trunc_sat_i8i16_minmax: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 35; CHECK-NEXT: vle16.v v8, (a0) 36; CHECK-NEXT: vnclip.wi v8, v8, 0 37; CHECK-NEXT: vse8.v v8, (a1) 38; CHECK-NEXT: ret 39 %1 = load <4 x i16>, ptr %x, align 16 40 %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>) 41 %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>) 42 %4 = trunc <4 x i16> %3 to <4 x i8> 43 store <4 x i8> %4, ptr %y, align 8 44 ret void 45} 46 47define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) { 48; CHECK-LABEL: trunc_sat_i8i16_notopt: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 51; CHECK-NEXT: vle16.v v8, (a0) 52; CHECK-NEXT: li a0, -127 53; CHECK-NEXT: vmax.vx v8, v8, a0 54; CHECK-NEXT: li a0, 128 55; CHECK-NEXT: vmin.vx v8, v8, a0 56; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 57; CHECK-NEXT: vnsrl.wi v8, v8, 0 58; CHECK-NEXT: vse8.v v8, (a1) 59; CHECK-NEXT: ret 60 %1 = load <4 x i16>, ptr %x, align 16 61 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -127, i16 -127, i16 -127, i16 -127>) 62 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 128, i16 128, i16 128, i16 128>) 63 %4 = trunc <4 x i16> %3 to <4 x i8> 64 store <4 x i8> %4, ptr %y, align 8 65 ret void 66} 67 68define void @trunc_sat_u8u16_min(ptr %x, ptr %y) { 69; CHECK-LABEL: trunc_sat_u8u16_min: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 72; CHECK-NEXT: vle16.v v8, (a0) 73; CHECK-NEXT: vnclipu.wi v8, v8, 0 74; CHECK-NEXT: vse8.v v8, (a1) 75; CHECK-NEXT: ret 76 %1 = load <4 x i16>, ptr %x, align 16 77 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>) 78 %3 = trunc <4 x i16> %2 to <4 x i8> 79 store <4 x i8> %3, ptr %y, align 8 80 ret void 81} 82 83define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) { 84; CHECK-LABEL: trunc_sat_u8u16_notopt: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 87; CHECK-NEXT: vle16.v v8, (a0) 88; CHECK-NEXT: li a0, 127 89; CHECK-NEXT: vminu.vx v8, v8, a0 90; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 91; CHECK-NEXT: vnsrl.wi v8, v8, 0 92; CHECK-NEXT: vse8.v v8, (a1) 93; CHECK-NEXT: ret 94 %1 = load <4 x i16>, ptr %x, align 16 95 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>) 96 %3 = trunc <4 x i16> %2 to <4 x i8> 97 store <4 x i8> %3, ptr %y, align 8 98 ret void 99} 100 101define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) { 102; CHECK-LABEL: trunc_sat_u8u16_maxmin: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 105; CHECK-NEXT: vle16.v v8, (a0) 106; CHECK-NEXT: vmax.vx v8, v8, zero 107; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 108; CHECK-NEXT: vnclipu.wi v8, v8, 0 109; CHECK-NEXT: vse8.v v8, (a1) 110; CHECK-NEXT: ret 111 %1 = load <4 x i16>, ptr %x, align 16 112 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> zeroinitializer) 113 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>) 114 %4 = trunc <4 x i16> %3 to <4 x i8> 115 store <4 x i8> %4, ptr %y, align 8 116 ret void 117} 118 119define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) { 120; CHECK-LABEL: trunc_sat_u8u16_minmax: 121; CHECK: # %bb.0: 122; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 123; CHECK-NEXT: vle16.v v8, (a0) 124; CHECK-NEXT: vmax.vx v8, v8, zero 125; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 126; CHECK-NEXT: vnclipu.wi v8, v8, 0 127; CHECK-NEXT: vse8.v v8, (a1) 128; CHECK-NEXT: ret 129 %1 = load <4 x i16>, ptr %x, align 16 130 %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>) 131 %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> zeroinitializer) 132 %4 = trunc <4 x i16> %3 to <4 x i8> 133 store <4 x i8> %4, ptr %y, align 8 134 ret void 135} 136 137 138define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) { 139; CHECK-LABEL: trunc_sat_i16i32_notopt: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 142; CHECK-NEXT: vle32.v v8, (a0) 143; CHECK-NEXT: lui a0, 1048568 144; CHECK-NEXT: addi a0, a0, 1 145; CHECK-NEXT: vmax.vx v8, v8, a0 146; CHECK-NEXT: lui a0, 8 147; CHECK-NEXT: vmin.vx v8, v8, a0 148; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 149; CHECK-NEXT: vnsrl.wi v8, v8, 0 150; CHECK-NEXT: vse16.v v8, (a1) 151; CHECK-NEXT: ret 152 %1 = load <4 x i32>, ptr %x, align 32 153 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32767, i32 -32767, i32 -32767, i32 -32767>) 154 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>) 155 %4 = trunc <4 x i32> %3 to <4 x i16> 156 store <4 x i16> %4, ptr %y, align 16 157 ret void 158} 159 160define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) { 161; CHECK-LABEL: trunc_sat_i16i32_maxmin: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 164; CHECK-NEXT: vle32.v v8, (a0) 165; CHECK-NEXT: vnclip.wi v8, v8, 0 166; CHECK-NEXT: vse16.v v8, (a1) 167; CHECK-NEXT: ret 168 %1 = load <4 x i32>, ptr %x, align 32 169 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>) 170 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>) 171 %4 = trunc <4 x i32> %3 to <4 x i16> 172 store <4 x i16> %4, ptr %y, align 16 173 ret void 174} 175 176define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) { 177; CHECK-LABEL: trunc_sat_i16i32_minmax: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 180; CHECK-NEXT: vle32.v v8, (a0) 181; CHECK-NEXT: vnclip.wi v8, v8, 0 182; CHECK-NEXT: vse16.v v8, (a1) 183; CHECK-NEXT: ret 184 %1 = load <4 x i32>, ptr %x, align 32 185 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>) 186 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>) 187 %4 = trunc <4 x i32> %3 to <4 x i16> 188 store <4 x i16> %4, ptr %y, align 16 189 ret void 190} 191 192define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) { 193; CHECK-LABEL: trunc_sat_u16u32_notopt: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 196; CHECK-NEXT: vle32.v v8, (a0) 197; CHECK-NEXT: lui a0, 8 198; CHECK-NEXT: addi a0, a0, -1 199; CHECK-NEXT: vminu.vx v8, v8, a0 200; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 201; CHECK-NEXT: vnsrl.wi v8, v8, 0 202; CHECK-NEXT: vse16.v v8, (a1) 203; CHECK-NEXT: ret 204 %1 = load <4 x i32>, ptr %x, align 32 205 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>) 206 %3 = trunc <4 x i32> %2 to <4 x i16> 207 store <4 x i16> %3, ptr %y, align 16 208 ret void 209} 210 211define void @trunc_sat_u16u32_min(ptr %x, ptr %y) { 212; CHECK-LABEL: trunc_sat_u16u32_min: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 215; CHECK-NEXT: vle32.v v8, (a0) 216; CHECK-NEXT: vnclipu.wi v8, v8, 0 217; CHECK-NEXT: vse16.v v8, (a1) 218; CHECK-NEXT: ret 219 %1 = load <4 x i32>, ptr %x, align 32 220 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>) 221 %3 = trunc <4 x i32> %2 to <4 x i16> 222 store <4 x i16> %3, ptr %y, align 16 223 ret void 224} 225 226define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) { 227; CHECK-LABEL: trunc_sat_u16u32_maxmin: 228; CHECK: # %bb.0: 229; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 230; CHECK-NEXT: vle32.v v8, (a0) 231; CHECK-NEXT: li a0, 1 232; CHECK-NEXT: vmax.vx v8, v8, a0 233; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 234; CHECK-NEXT: vnclipu.wi v8, v8, 0 235; CHECK-NEXT: vse16.v v8, (a1) 236; CHECK-NEXT: ret 237 %1 = load <4 x i32>, ptr %x, align 16 238 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 239 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>) 240 %4 = trunc <4 x i32> %3 to <4 x i16> 241 store <4 x i16> %4, ptr %y, align 8 242 ret void 243} 244 245define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) { 246; CHECK-LABEL: trunc_sat_u16u32_minmax: 247; CHECK: # %bb.0: 248; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 249; CHECK-NEXT: vle32.v v8, (a0) 250; CHECK-NEXT: li a0, 50 251; CHECK-NEXT: vmax.vx v8, v8, a0 252; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 253; CHECK-NEXT: vnclipu.wi v8, v8, 0 254; CHECK-NEXT: vse16.v v8, (a1) 255; CHECK-NEXT: ret 256 %1 = load <4 x i32>, ptr %x, align 16 257 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>) 258 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 50, i32 50, i32 50, i32 50>) 259 %4 = trunc <4 x i32> %3 to <4 x i16> 260 store <4 x i16> %4, ptr %y, align 8 261 ret void 262} 263 264 265define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) { 266; CHECK-LABEL: trunc_sat_i32i64_notopt: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 269; CHECK-NEXT: vle64.v v8, (a0) 270; CHECK-NEXT: lui a0, 524288 271; CHECK-NEXT: addiw a0, a0, 1 272; CHECK-NEXT: vmax.vx v8, v8, a0 273; CHECK-NEXT: li a0, 1 274; CHECK-NEXT: slli a0, a0, 31 275; CHECK-NEXT: vmin.vx v8, v8, a0 276; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 277; CHECK-NEXT: vnsrl.wi v10, v8, 0 278; CHECK-NEXT: vse32.v v10, (a1) 279; CHECK-NEXT: ret 280 %1 = load <4 x i64>, ptr %x, align 64 281 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>) 282 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483648, i64 2147483648, i64 2147483648, i64 2147483648>) 283 %4 = trunc <4 x i64> %3 to <4 x i32> 284 store <4 x i32> %4, ptr %y, align 32 285 ret void 286} 287 288define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) { 289; CHECK-LABEL: trunc_sat_i32i64_maxmin: 290; CHECK: # %bb.0: 291; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 292; CHECK-NEXT: vle64.v v8, (a0) 293; CHECK-NEXT: vnclip.wi v10, v8, 0 294; CHECK-NEXT: vse32.v v10, (a1) 295; CHECK-NEXT: ret 296 %1 = load <4 x i64>, ptr %x, align 64 297 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>) 298 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>) 299 %4 = trunc <4 x i64> %3 to <4 x i32> 300 store <4 x i32> %4, ptr %y, align 32 301 ret void 302} 303 304define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) { 305; CHECK-LABEL: trunc_sat_i32i64_minmax: 306; CHECK: # %bb.0: 307; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 308; CHECK-NEXT: vle64.v v8, (a0) 309; CHECK-NEXT: vnclip.wi v10, v8, 0 310; CHECK-NEXT: vse32.v v10, (a1) 311; CHECK-NEXT: ret 312 %1 = load <4 x i64>, ptr %x, align 64 313 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>) 314 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>) 315 %4 = trunc <4 x i64> %3 to <4 x i32> 316 store <4 x i32> %4, ptr %y, align 32 317 ret void 318} 319 320 321define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) { 322; CHECK-LABEL: trunc_sat_u32u64_notopt: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 325; CHECK-NEXT: vle64.v v8, (a0) 326; CHECK-NEXT: lui a0, 524288 327; CHECK-NEXT: addiw a0, a0, -1 328; CHECK-NEXT: vminu.vx v8, v8, a0 329; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 330; CHECK-NEXT: vnsrl.wi v10, v8, 0 331; CHECK-NEXT: vse32.v v10, (a1) 332; CHECK-NEXT: ret 333 %1 = load <4 x i64>, ptr %x, align 64 334 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>) 335 %3 = trunc <4 x i64> %2 to <4 x i32> 336 store <4 x i32> %3, ptr %y, align 32 337 ret void 338} 339 340define void @trunc_sat_u32u64_min(ptr %x, ptr %y) { 341; CHECK-LABEL: trunc_sat_u32u64_min: 342; CHECK: # %bb.0: 343; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 344; CHECK-NEXT: vle64.v v8, (a0) 345; CHECK-NEXT: vnclipu.wi v10, v8, 0 346; CHECK-NEXT: vse32.v v10, (a1) 347; CHECK-NEXT: ret 348 %1 = load <4 x i64>, ptr %x, align 64 349 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>) 350 %3 = trunc <4 x i64> %2 to <4 x i32> 351 store <4 x i32> %3, ptr %y, align 32 352 ret void 353} 354 355 356define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) { 357; CHECK-LABEL: trunc_sat_u32u64_maxmin: 358; CHECK: # %bb.0: 359; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 360; CHECK-NEXT: vle64.v v8, (a0) 361; CHECK-NEXT: vmax.vx v8, v8, zero 362; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 363; CHECK-NEXT: vnclipu.wi v10, v8, 0 364; CHECK-NEXT: vse32.v v10, (a1) 365; CHECK-NEXT: ret 366 %1 = load <4 x i64>, ptr %x, align 16 367 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer) 368 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>) 369 %4 = trunc <4 x i64> %3 to <4 x i32> 370 store <4 x i32> %4, ptr %y, align 8 371 ret void 372} 373 374define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) { 375; CHECK-LABEL: trunc_sat_u32u64_minmax: 376; CHECK: # %bb.0: 377; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 378; CHECK-NEXT: vle64.v v8, (a0) 379; CHECK-NEXT: vmax.vx v8, v8, zero 380; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 381; CHECK-NEXT: vnclipu.wi v10, v8, 0 382; CHECK-NEXT: vse32.v v10, (a1) 383; CHECK-NEXT: ret 384 %1 = load <4 x i64>, ptr %x, align 16 385 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>) 386 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer) 387 %4 = trunc <4 x i64> %3 to <4 x i32> 388 store <4 x i32> %4, ptr %y, align 8 389 ret void 390} 391 392define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) { 393; CHECK-LABEL: trunc_sat_i8i32_maxmin: 394; CHECK: # %bb.0: 395; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 396; CHECK-NEXT: vle32.v v8, (a0) 397; CHECK-NEXT: vnclip.wi v8, v8, 0 398; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 399; CHECK-NEXT: vnclip.wi v8, v8, 0 400; CHECK-NEXT: vse8.v v8, (a1) 401; CHECK-NEXT: ret 402 %1 = load <4 x i32>, ptr %x, align 16 403 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>) 404 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) 405 %4 = trunc <4 x i32> %3 to <4 x i8> 406 store <4 x i8> %4, ptr %y, align 8 407 ret void 408} 409 410define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) { 411; CHECK-LABEL: trunc_sat_i8i32_minmax: 412; CHECK: # %bb.0: 413; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 414; CHECK-NEXT: vle32.v v8, (a0) 415; CHECK-NEXT: vnclip.wi v8, v8, 0 416; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 417; CHECK-NEXT: vnclip.wi v8, v8, 0 418; CHECK-NEXT: vse8.v v8, (a1) 419; CHECK-NEXT: ret 420 %1 = load <4 x i32>, ptr %x, align 16 421 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) 422 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>) 423 %4 = trunc <4 x i32> %3 to <4 x i8> 424 store <4 x i8> %4, ptr %y, align 8 425 ret void 426} 427 428define void @trunc_sat_u8u32_min(ptr %x, ptr %y) { 429; CHECK-LABEL: trunc_sat_u8u32_min: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 432; CHECK-NEXT: vle32.v v8, (a0) 433; CHECK-NEXT: vnclipu.wi v8, v8, 0 434; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 435; CHECK-NEXT: vnclipu.wi v8, v8, 0 436; CHECK-NEXT: vse8.v v8, (a1) 437; CHECK-NEXT: ret 438 %1 = load <4 x i32>, ptr %x, align 16 439 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>) 440 %3 = trunc <4 x i32> %2 to <4 x i8> 441 store <4 x i8> %3, ptr %y, align 8 442 ret void 443} 444 445define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) { 446; CHECK-LABEL: trunc_sat_u8u32_maxmin: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 449; CHECK-NEXT: vle32.v v8, (a0) 450; CHECK-NEXT: vmax.vx v8, v8, zero 451; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 452; CHECK-NEXT: vnclipu.wi v8, v8, 0 453; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 454; CHECK-NEXT: vnclipu.wi v8, v8, 0 455; CHECK-NEXT: vse8.v v8, (a1) 456; CHECK-NEXT: ret 457 %1 = load <4 x i32>, ptr %x, align 16 458 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> zeroinitializer) 459 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 255, i32 255, i32 255, i32 255>) 460 %4 = trunc <4 x i32> %3 to <4 x i8> 461 store <4 x i8> %4, ptr %y, align 8 462 ret void 463} 464 465define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) { 466; CHECK-LABEL: trunc_sat_u8u32_minmax: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 469; CHECK-NEXT: vle32.v v8, (a0) 470; CHECK-NEXT: vmax.vx v8, v8, zero 471; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 472; CHECK-NEXT: vnclipu.wi v8, v8, 0 473; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 474; CHECK-NEXT: vnclipu.wi v8, v8, 0 475; CHECK-NEXT: vse8.v v8, (a1) 476; CHECK-NEXT: ret 477 %1 = load <4 x i32>, ptr %x, align 16 478 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>) 479 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> zeroinitializer) 480 %4 = trunc <4 x i32> %3 to <4 x i8> 481 store <4 x i8> %4, ptr %y, align 8 482 ret void 483} 484 485define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) { 486; CHECK-LABEL: trunc_sat_i8i64_maxmin: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 489; CHECK-NEXT: vle64.v v8, (a0) 490; CHECK-NEXT: vnclip.wi v10, v8, 0 491; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 492; CHECK-NEXT: vnclip.wi v8, v10, 0 493; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 494; CHECK-NEXT: vnclip.wi v8, v8, 0 495; CHECK-NEXT: vse8.v v8, (a1) 496; CHECK-NEXT: ret 497 %1 = load <4 x i64>, ptr %x, align 16 498 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>) 499 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 127, i64 127, i64 127, i64 127>) 500 %4 = trunc <4 x i64> %3 to <4 x i8> 501 store <4 x i8> %4, ptr %y, align 8 502 ret void 503} 504 505define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) { 506; CHECK-LABEL: trunc_sat_i8i64_minmax: 507; CHECK: # %bb.0: 508; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 509; CHECK-NEXT: vle64.v v8, (a0) 510; CHECK-NEXT: vnclip.wi v10, v8, 0 511; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 512; CHECK-NEXT: vnclip.wi v8, v10, 0 513; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 514; CHECK-NEXT: vnclip.wi v8, v8, 0 515; CHECK-NEXT: vse8.v v8, (a1) 516; CHECK-NEXT: ret 517 %1 = load <4 x i64>, ptr %x, align 16 518 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 127, i64 127, i64 127, i64 127>) 519 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>) 520 %4 = trunc <4 x i64> %3 to <4 x i8> 521 store <4 x i8> %4, ptr %y, align 8 522 ret void 523} 524 525define void @trunc_sat_u8u64_min(ptr %x, ptr %y) { 526; CHECK-LABEL: trunc_sat_u8u64_min: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 529; CHECK-NEXT: vle64.v v8, (a0) 530; CHECK-NEXT: vnclipu.wi v10, v8, 0 531; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 532; CHECK-NEXT: vnclipu.wi v8, v10, 0 533; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 534; CHECK-NEXT: vnclipu.wi v8, v8, 0 535; CHECK-NEXT: vse8.v v8, (a1) 536; CHECK-NEXT: ret 537 %1 = load <4 x i64>, ptr %x, align 16 538 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>) 539 %3 = trunc <4 x i64> %2 to <4 x i8> 540 store <4 x i8> %3, ptr %y, align 8 541 ret void 542} 543 544define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) { 545; CHECK-LABEL: trunc_sat_u8u64_maxmin: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 548; CHECK-NEXT: vle64.v v8, (a0) 549; CHECK-NEXT: vmax.vx v8, v8, zero 550; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 551; CHECK-NEXT: vnclipu.wi v10, v8, 0 552; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 553; CHECK-NEXT: vnclipu.wi v8, v10, 0 554; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 555; CHECK-NEXT: vnclipu.wi v8, v8, 0 556; CHECK-NEXT: vse8.v v8, (a1) 557; CHECK-NEXT: ret 558 %1 = load <4 x i64>, ptr %x, align 16 559 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer) 560 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 255, i64 255, i64 255, i64 255>) 561 %4 = trunc <4 x i64> %3 to <4 x i8> 562 store <4 x i8> %4, ptr %y, align 8 563 ret void 564} 565 566define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) { 567; CHECK-LABEL: trunc_sat_u8u64_minmax: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 570; CHECK-NEXT: vle64.v v8, (a0) 571; CHECK-NEXT: vmax.vx v8, v8, zero 572; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 573; CHECK-NEXT: vnclipu.wi v10, v8, 0 574; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 575; CHECK-NEXT: vnclipu.wi v8, v10, 0 576; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 577; CHECK-NEXT: vnclipu.wi v8, v8, 0 578; CHECK-NEXT: vse8.v v8, (a1) 579; CHECK-NEXT: ret 580 %1 = load <4 x i64>, ptr %x, align 16 581 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>) 582 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer) 583 %4 = trunc <4 x i64> %3 to <4 x i8> 584 store <4 x i8> %4, ptr %y, align 8 585 ret void 586} 587 588define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) { 589; CHECK-LABEL: trunc_sat_i16i64_maxmin: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 592; CHECK-NEXT: vle64.v v8, (a0) 593; CHECK-NEXT: vnclip.wi v10, v8, 0 594; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 595; CHECK-NEXT: vnclip.wi v8, v10, 0 596; CHECK-NEXT: vse16.v v8, (a1) 597; CHECK-NEXT: ret 598 %1 = load <4 x i64>, ptr %x, align 32 599 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>) 600 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>) 601 %4 = trunc <4 x i64> %3 to <4 x i16> 602 store <4 x i16> %4, ptr %y, align 16 603 ret void 604} 605 606define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) { 607; CHECK-LABEL: trunc_sat_i16i64_minmax: 608; CHECK: # %bb.0: 609; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 610; CHECK-NEXT: vle64.v v8, (a0) 611; CHECK-NEXT: vnclip.wi v10, v8, 0 612; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 613; CHECK-NEXT: vnclip.wi v8, v10, 0 614; CHECK-NEXT: vse16.v v8, (a1) 615; CHECK-NEXT: ret 616 %1 = load <4 x i64>, ptr %x, align 32 617 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>) 618 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>) 619 %4 = trunc <4 x i64> %3 to <4 x i16> 620 store <4 x i16> %4, ptr %y, align 16 621 ret void 622} 623 624define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) { 625; CHECK-LABEL: trunc_sat_u16u64_notopt: 626; CHECK: # %bb.0: 627; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 628; CHECK-NEXT: vle64.v v8, (a0) 629; CHECK-NEXT: lui a0, 8 630; CHECK-NEXT: addiw a0, a0, -1 631; CHECK-NEXT: vminu.vx v8, v8, a0 632; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 633; CHECK-NEXT: vnsrl.wi v10, v8, 0 634; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 635; CHECK-NEXT: vnsrl.wi v8, v10, 0 636; CHECK-NEXT: vse16.v v8, (a1) 637; CHECK-NEXT: ret 638 %1 = load <4 x i64>, ptr %x, align 32 639 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>) 640 %3 = trunc <4 x i64> %2 to <4 x i16> 641 store <4 x i16> %3, ptr %y, align 16 642 ret void 643} 644 645define void @trunc_sat_u16u64_min(ptr %x, ptr %y) { 646; CHECK-LABEL: trunc_sat_u16u64_min: 647; CHECK: # %bb.0: 648; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 649; CHECK-NEXT: vle64.v v8, (a0) 650; CHECK-NEXT: vnclipu.wi v10, v8, 0 651; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 652; CHECK-NEXT: vnclipu.wi v8, v10, 0 653; CHECK-NEXT: vse16.v v8, (a1) 654; CHECK-NEXT: ret 655 %1 = load <4 x i64>, ptr %x, align 32 656 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>) 657 %3 = trunc <4 x i64> %2 to <4 x i16> 658 store <4 x i16> %3, ptr %y, align 16 659 ret void 660} 661 662define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) { 663; CHECK-LABEL: trunc_sat_u16u64_maxmin: 664; CHECK: # %bb.0: 665; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 666; CHECK-NEXT: vle64.v v8, (a0) 667; CHECK-NEXT: li a0, 1 668; CHECK-NEXT: vmax.vx v8, v8, a0 669; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 670; CHECK-NEXT: vnclipu.wi v10, v8, 0 671; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 672; CHECK-NEXT: vnclipu.wi v8, v10, 0 673; CHECK-NEXT: vse16.v v8, (a1) 674; CHECK-NEXT: ret 675 %1 = load <4 x i64>, ptr %x, align 16 676 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 1, i64 1, i64 1, i64 1>) 677 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>) 678 %4 = trunc <4 x i64> %3 to <4 x i16> 679 store <4 x i16> %4, ptr %y, align 8 680 ret void 681} 682 683define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) { 684; CHECK-LABEL: trunc_sat_u16u64_minmax: 685; CHECK: # %bb.0: 686; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 687; CHECK-NEXT: vle64.v v8, (a0) 688; CHECK-NEXT: li a0, 50 689; CHECK-NEXT: vmax.vx v8, v8, a0 690; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 691; CHECK-NEXT: vnclipu.wi v10, v8, 0 692; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 693; CHECK-NEXT: vnclipu.wi v8, v10, 0 694; CHECK-NEXT: vse16.v v8, (a1) 695; CHECK-NEXT: ret 696 %1 = load <4 x i64>, ptr %x, align 16 697 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>) 698 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 50, i64 50, i64 50, i64 50>) 699 %4 = trunc <4 x i64> %3 to <4 x i16> 700 store <4 x i16> %4, ptr %y, align 8 701 ret void 702} 703