1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s 3 4define void @vctp8(i32 %arg, ptr %in, ptr %out) { 5; CHECK-LABEL: vctp8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldrw.u32 q1, [r1] 8; CHECK-NEXT: vctp.8 r0 9; CHECK-NEXT: vmov.i32 q0, #0x0 10; CHECK-NEXT: vpst 11; CHECK-NEXT: vmovt q0, q1 12; CHECK-NEXT: vstrw.32 q0, [r2] 13; CHECK-NEXT: bx lr 14 %pred = call <16 x i1> @llvm.arm.mve.vctp8(i32 %arg) 15 %ld = load <16 x i8>, ptr %in 16 %res = select <16 x i1> %pred, <16 x i8> %ld, <16 x i8> zeroinitializer 17 store <16 x i8> %res, ptr %out 18 ret void 19} 20 21define void @vctp16(i32 %arg, ptr %in, ptr %out) { 22; CHECK-LABEL: vctp16: 23; CHECK: @ %bb.0: 24; CHECK-NEXT: vldrw.u32 q1, [r1] 25; CHECK-NEXT: vctp.16 r0 26; CHECK-NEXT: vmov.i32 q0, #0x0 27; CHECK-NEXT: vpst 28; CHECK-NEXT: vmovt q0, q1 29; CHECK-NEXT: vstrw.32 q0, [r2] 30; CHECK-NEXT: bx lr 31 %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %arg) 32 %ld = load <8 x i16>, ptr %in 33 %res = select <8 x i1> %pred, <8 x i16> %ld, <8 x i16> zeroinitializer 34 store <8 x i16> %res, ptr %out 35 ret void 36} 37 38define void @vctp32(i32 %arg, ptr %in, ptr %out) { 39; CHECK-LABEL: vctp32: 40; CHECK: @ %bb.0: 41; CHECK-NEXT: vldrw.u32 q1, [r1] 42; CHECK-NEXT: vctp.32 r0 43; CHECK-NEXT: vmov.i32 q0, #0x0 44; CHECK-NEXT: vpst 45; CHECK-NEXT: vmovt q0, q1 46; CHECK-NEXT: vstrw.32 q0, [r2] 47; CHECK-NEXT: bx lr 48 %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %arg) 49 %ld = load <4 x i32>, ptr %in 50 %res = select <4 x i1> %pred, <4 x i32> %ld, <4 x i32> zeroinitializer 51 store <4 x i32> %res, ptr %out 52 ret void 53} 54 55define void @vctp64(i32 %arg, ptr %in, ptr %out) { 56; CHECK-LABEL: vctp64: 57; CHECK: @ %bb.0: 58; CHECK-NEXT: vldrw.u32 q1, [r1] 59; CHECK-NEXT: vctp.64 r0 60; CHECK-NEXT: vmov.i32 q0, #0x0 61; CHECK-NEXT: vpst 62; CHECK-NEXT: vmovt q0, q1 63; CHECK-NEXT: vstrw.32 q0, [r2] 64; CHECK-NEXT: bx lr 65 %pred = call <2 x i1> @llvm.arm.mve.vctp64(i32 %arg) 66 %ld = load <2 x i64>, ptr %in 67 %res = select <2 x i1> %pred, <2 x i64> %ld, <2 x i64> zeroinitializer 68 store <2 x i64> %res, ptr %out 69 ret void 70} 71 72define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32(i32 %n, <4 x i32> %a, <4 x i32> %b) { 73; CHECK-LABEL: vcmp_ult_v4i32: 74; CHECK: @ %bb.0: @ %entry 75; CHECK-NEXT: vctp.32 r0 76; CHECK-NEXT: vpst 77; CHECK-NEXT: vmovt q1, q0 78; CHECK-NEXT: vmov q0, q1 79; CHECK-NEXT: bx lr 80entry: 81 %i = insertelement <4 x i32> undef, i32 %n, i32 0 82 %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 83 %c = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %ns 84 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b 85 ret <4 x i32> %s 86} 87 88define arm_aapcs_vfpcc <4 x i32> @vcmp_uge_v4i32(i32 %n, <4 x i32> %a, <4 x i32> %b) { 89; CHECK-LABEL: vcmp_uge_v4i32: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vctp.32 r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vmovt q1, q0 94; CHECK-NEXT: vmov q0, q1 95; CHECK-NEXT: bx lr 96entry: 97 %i = insertelement <4 x i32> undef, i32 %n, i32 0 98 %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 99 %c = icmp uge <4 x i32> %ns, <i32 0, i32 1, i32 2, i32 3> 100 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b 101 ret <4 x i32> %s 102} 103 104define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32_undef(i32 %n, <4 x i32> %a, <4 x i32> %b) { 105; CHECK-LABEL: vcmp_ult_v4i32_undef: 106; CHECK: @ %bb.0: @ %entry 107; CHECK-NEXT: vctp.32 r0 108; CHECK-NEXT: vpst 109; CHECK-NEXT: vmovt q1, q0 110; CHECK-NEXT: vmov q0, q1 111; CHECK-NEXT: bx lr 112entry: 113 %i = insertelement <4 x i32> undef, i32 %n, i32 0 114 %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 115 %c = icmp ult <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>, %ns 116 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b 117 ret <4 x i32> %s 118} 119 120 121define arm_aapcs_vfpcc <8 x i16> @vcmp_ult_v8i16(i16 %n, <8 x i16> %a, <8 x i16> %b) { 122; CHECK-LABEL: vcmp_ult_v8i16: 123; CHECK: @ %bb.0: @ %entry 124; CHECK-NEXT: uxth r0, r0 125; CHECK-NEXT: vctp.16 r0 126; CHECK-NEXT: vpst 127; CHECK-NEXT: vmovt q1, q0 128; CHECK-NEXT: vmov q0, q1 129; CHECK-NEXT: bx lr 130entry: 131 %i = insertelement <8 x i16> undef, i16 %n, i32 0 132 %ns = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 133 %c = icmp ult <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %ns 134 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b 135 ret <8 x i16> %s 136} 137 138define arm_aapcs_vfpcc <8 x i16> @vcmp_uge_v8i16(i16 %n, <8 x i16> %a, <8 x i16> %b) { 139; CHECK-LABEL: vcmp_uge_v8i16: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: uxth r0, r0 142; CHECK-NEXT: vctp.16 r0 143; CHECK-NEXT: vpst 144; CHECK-NEXT: vmovt q1, q0 145; CHECK-NEXT: vmov q0, q1 146; CHECK-NEXT: bx lr 147entry: 148 %i = insertelement <8 x i16> undef, i16 %n, i32 0 149 %ns = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 150 %c = icmp uge <8 x i16> %ns, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 151 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b 152 ret <8 x i16> %s 153} 154 155 156define arm_aapcs_vfpcc <16 x i8> @vcmp_ult_v16i8(i8 %n, <16 x i8> %a, <16 x i8> %b) { 157; CHECK-LABEL: vcmp_ult_v16i8: 158; CHECK: @ %bb.0: @ %entry 159; CHECK-NEXT: uxtb r0, r0 160; CHECK-NEXT: vctp.8 r0 161; CHECK-NEXT: vpst 162; CHECK-NEXT: vmovt q1, q0 163; CHECK-NEXT: vmov q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %i = insertelement <16 x i8> undef, i8 %n, i32 0 167 %ns = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 168 %c = icmp ult <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %ns 169 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b 170 ret <16 x i8> %s 171} 172 173define arm_aapcs_vfpcc <16 x i8> @vcmp_uge_v16i8(i8 %n, <16 x i8> %a, <16 x i8> %b) { 174; CHECK-LABEL: vcmp_uge_v16i8: 175; CHECK: @ %bb.0: @ %entry 176; CHECK-NEXT: uxtb r0, r0 177; CHECK-NEXT: vctp.8 r0 178; CHECK-NEXT: vpst 179; CHECK-NEXT: vmovt q1, q0 180; CHECK-NEXT: vmov q0, q1 181; CHECK-NEXT: bx lr 182entry: 183 %i = insertelement <16 x i8> undef, i8 %n, i32 0 184 %ns = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 185 %c = icmp uge <16 x i8> %ns, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15> 186 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b 187 ret <16 x i8> %s 188} 189 190 191define arm_aapcs_vfpcc <2 x i64> @vcmp_ult_v2i64(i64 %n, <2 x i64> %a, <2 x i64> %b) { 192; CHECK-LABEL: vcmp_ult_v2i64: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vctp.64 r0 195; CHECK-NEXT: vpst 196; CHECK-NEXT: vmovt q1, q0 197; CHECK-NEXT: vmov q0, q1 198; CHECK-NEXT: bx lr 199entry: 200 %i = insertelement <2 x i64> undef, i64 %n, i32 0 201 %ns = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer 202 %c = icmp ult <2 x i64> <i64 0, i64 1>, %ns 203 %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b 204 ret <2 x i64> %s 205} 206 207define arm_aapcs_vfpcc <2 x i64> @vcmp_uge_v2i64(i64 %n, <2 x i64> %a, <2 x i64> %b) { 208; CHECK-LABEL: vcmp_uge_v2i64: 209; CHECK: @ %bb.0: @ %entry 210; CHECK-NEXT: vctp.64 r0 211; CHECK-NEXT: vpst 212; CHECK-NEXT: vmovt q1, q0 213; CHECK-NEXT: vmov q0, q1 214; CHECK-NEXT: bx lr 215entry: 216 %i = insertelement <2 x i64> undef, i64 %n, i32 0 217 %ns = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer 218 %c = icmp uge <2 x i64> %ns, <i64 0, i64 1> 219 %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b 220 ret <2 x i64> %s 221} 222 223 224declare <16 x i1> @llvm.arm.mve.vctp8(i32) 225declare <8 x i1> @llvm.arm.mve.vctp16(i32) 226declare <4 x i1> @llvm.arm.mve.vctp32(i32) 227declare <2 x i1> @llvm.arm.mve.vctp64(i32) 228