1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; For all the functions below should the operation is a nop 5define <vscale x 8 x i8> @trunc_i16toi8(<vscale x 8 x i16> %in) { 6; CHECK-LABEL: trunc_i16toi8: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: ret 9entry: 10 %out = trunc <vscale x 8 x i16> %in to <vscale x 8 x i8> 11 ret <vscale x 8 x i8> %out 12} 13 14define <vscale x 4 x i8> @trunc_i32toi8(<vscale x 4 x i32> %in) { 15; CHECK-LABEL: trunc_i32toi8: 16; CHECK: // %bb.0: // %entry 17; CHECK-NEXT: ret 18entry: 19 %out = trunc <vscale x 4 x i32> %in to <vscale x 4 x i8> 20 ret <vscale x 4 x i8> %out 21} 22 23define <vscale x 2 x i8> @trunc_i64toi8(<vscale x 2 x i64> %in) { 24; CHECK-LABEL: trunc_i64toi8: 25; CHECK: // %bb.0: // %entry 26; CHECK-NEXT: ret 27entry: 28 %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i8> 29 ret <vscale x 2 x i8> %out 30} 31 32define <vscale x 4 x i16> @trunc_i32toi16(<vscale x 4 x i32> %in) { 33; CHECK-LABEL: trunc_i32toi16: 34; CHECK: // %bb.0: // %entry 35; CHECK-NEXT: ret 36entry: 37 %out = trunc <vscale x 4 x i32> %in to <vscale x 4 x i16> 38 ret <vscale x 4 x i16> %out 39} 40 41define <vscale x 2 x i16> @trunc_i64toi16(<vscale x 2 x i64> %in) { 42; CHECK-LABEL: trunc_i64toi16: 43; CHECK: // %bb.0: // %entry 44; CHECK-NEXT: ret 45entry: 46 %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i16> 47 ret <vscale x 2 x i16> %out 48} 49 50define <vscale x 2 x i32> @trunc_i64toi32(<vscale x 2 x i64> %in) { 51; CHECK-LABEL: trunc_i64toi32: 52; CHECK: // %bb.0: // %entry 53; CHECK-NEXT: ret 54entry: 55 %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i32> 56 ret <vscale x 2 x i32> %out 57} 58 59; Truncating to i1 requires convert it to a cmp 60 61define <vscale x 2 x i1> @trunc_i64toi1(<vscale x 2 x i64> %in) { 62; CHECK-LABEL: trunc_i64toi1: 63; CHECK: // %bb.0: // %entry 64; CHECK-NEXT: and z0.d, z0.d, #0x1 65; CHECK-NEXT: ptrue p0.d 66; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 67; CHECK-NEXT: ret 68entry: 69 %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i1> 70 ret <vscale x 2 x i1> %out 71} 72 73define <vscale x 4 x i1> @trunc_i64toi1_split(<vscale x 4 x i64> %in) { 74; CHECK-LABEL: trunc_i64toi1_split: 75; CHECK: // %bb.0: // %entry 76; CHECK-NEXT: and z1.d, z1.d, #0x1 77; CHECK-NEXT: and z0.d, z0.d, #0x1 78; CHECK-NEXT: ptrue p0.d 79; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 80; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 81; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s 82; CHECK-NEXT: ret 83entry: 84 %out = trunc <vscale x 4 x i64> %in to <vscale x 4 x i1> 85 ret <vscale x 4 x i1> %out 86} 87 88define <vscale x 8 x i1> @trunc_i64toi1_split2(<vscale x 8 x i64> %in) { 89; CHECK-LABEL: trunc_i64toi1_split2: 90; CHECK: // %bb.0: // %entry 91; CHECK-NEXT: and z3.d, z3.d, #0x1 92; CHECK-NEXT: and z2.d, z2.d, #0x1 93; CHECK-NEXT: and z1.d, z1.d, #0x1 94; CHECK-NEXT: and z0.d, z0.d, #0x1 95; CHECK-NEXT: ptrue p0.d 96; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 97; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 98; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0 99; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 100; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 101; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s 102; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 103; CHECK-NEXT: ret 104entry: 105 %out = trunc <vscale x 8 x i64> %in to <vscale x 8 x i1> 106 ret <vscale x 8 x i1> %out 107} 108 109define <vscale x 16 x i1> @trunc_i64toi1_split3(<vscale x 16 x i64> %in) { 110; CHECK-LABEL: trunc_i64toi1_split3: 111; CHECK: // %bb.0: // %entry 112; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 113; CHECK-NEXT: addvl sp, sp, #-1 114; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill 115; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill 116; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill 117; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 118; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 119; CHECK-NEXT: .cfi_offset w29, -16 120; CHECK-NEXT: and z7.d, z7.d, #0x1 121; CHECK-NEXT: and z6.d, z6.d, #0x1 122; CHECK-NEXT: and z5.d, z5.d, #0x1 123; CHECK-NEXT: and z4.d, z4.d, #0x1 124; CHECK-NEXT: and z3.d, z3.d, #0x1 125; CHECK-NEXT: and z2.d, z2.d, #0x1 126; CHECK-NEXT: and z1.d, z1.d, #0x1 127; CHECK-NEXT: and z0.d, z0.d, #0x1 128; CHECK-NEXT: ptrue p0.d 129; CHECK-NEXT: cmpne p1.d, p0/z, z7.d, #0 130; CHECK-NEXT: cmpne p2.d, p0/z, z6.d, #0 131; CHECK-NEXT: cmpne p3.d, p0/z, z5.d, #0 132; CHECK-NEXT: cmpne p4.d, p0/z, z4.d, #0 133; CHECK-NEXT: cmpne p5.d, p0/z, z3.d, #0 134; CHECK-NEXT: cmpne p6.d, p0/z, z2.d, #0 135; CHECK-NEXT: cmpne p7.d, p0/z, z1.d, #0 136; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 137; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 138; CHECK-NEXT: uzp1 p2.s, p4.s, p3.s 139; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 140; CHECK-NEXT: uzp1 p3.s, p6.s, p5.s 141; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload 142; CHECK-NEXT: uzp1 p0.s, p0.s, p7.s 143; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload 144; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 145; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload 146; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h 147; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 148; CHECK-NEXT: addvl sp, sp, #1 149; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 150; CHECK-NEXT: ret 151entry: 152 %out = trunc <vscale x 16 x i64> %in to <vscale x 16 x i1> 153 ret <vscale x 16 x i1> %out 154} 155 156 157define <vscale x 4 x i1> @trunc_i32toi1(<vscale x 4 x i32> %in) { 158; CHECK-LABEL: trunc_i32toi1: 159; CHECK: // %bb.0: // %entry 160; CHECK-NEXT: and z0.s, z0.s, #0x1 161; CHECK-NEXT: ptrue p0.s 162; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 163; CHECK-NEXT: ret 164entry: 165 %out = trunc <vscale x 4 x i32> %in to <vscale x 4 x i1> 166 ret <vscale x 4 x i1> %out 167} 168 169define <vscale x 8 x i1> @trunc_i16toi1(<vscale x 8 x i16> %in) { 170; CHECK-LABEL: trunc_i16toi1: 171; CHECK: // %bb.0: // %entry 172; CHECK-NEXT: and z0.h, z0.h, #0x1 173; CHECK-NEXT: ptrue p0.h 174; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 175; CHECK-NEXT: ret 176entry: 177 %out = trunc <vscale x 8 x i16> %in to <vscale x 8 x i1> 178 ret <vscale x 8 x i1> %out 179} 180 181define <vscale x 16 x i1> @trunc_i8toi1(<vscale x 16 x i8> %in) { 182; CHECK-LABEL: trunc_i8toi1: 183; CHECK: // %bb.0: // %entry 184; CHECK-NEXT: and z0.b, z0.b, #0x1 185; CHECK-NEXT: ptrue p0.b 186; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 187; CHECK-NEXT: ret 188entry: 189 %out = trunc <vscale x 16 x i8> %in to <vscale x 16 x i1> 190 ret <vscale x 16 x i1> %out 191} 192 193define <vscale x 1 x i1> @trunc_nxv1i32_to_nxv1i1(<vscale x 1 x i32> %in) { 194; CHECK-LABEL: trunc_nxv1i32_to_nxv1i1: 195; CHECK: // %bb.0: 196; CHECK-NEXT: and z0.s, z0.s, #0x1 197; CHECK-NEXT: ptrue p0.s 198; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 199; CHECK-NEXT: punpklo p0.h, p0.b 200; CHECK-NEXT: punpklo p0.h, p0.b 201; CHECK-NEXT: ret 202 %out = trunc <vscale x 1 x i32> %in to <vscale x 1 x i1> 203 ret <vscale x 1 x i1> %out 204} 205 206define void @trunc_promoteIntRes(<vscale x 4 x i64> %0, ptr %ptr) { 207; CHECK-LABEL: trunc_promoteIntRes: 208; CHECK: // %bb.0: // %entry 209; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 210; CHECK-NEXT: ptrue p0.s 211; CHECK-NEXT: st1h { z0.s }, p0, [x0] 212; CHECK-NEXT: ret 213entry: 214 %1 = trunc <vscale x 4 x i64> %0 to <vscale x 4 x i16> 215 store <vscale x 4 x i16> %1, ptr %ptr, align 2 216 ret void 217} 218