1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s 4 5; 6; CLS 7; 8 9define <vscale x 16 x i8> @cls_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { 10; CHECK-LABEL: cls_i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: cls z0.b, p0/m, z1.b 13; CHECK-NEXT: ret 14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %a, 15 <vscale x 16 x i1> %pg, 16 <vscale x 16 x i8> %b) 17 ret <vscale x 16 x i8> %out 18} 19 20define <vscale x 8 x i16> @cls_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { 21; CHECK-LABEL: cls_i16: 22; CHECK: // %bb.0: 23; CHECK-NEXT: cls z0.h, p0/m, z1.h 24; CHECK-NEXT: ret 25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %a, 26 <vscale x 8 x i1> %pg, 27 <vscale x 8 x i16> %b) 28 ret <vscale x 8 x i16> %out 29} 30 31define <vscale x 4 x i32> @cls_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 32; CHECK-LABEL: cls_i32: 33; CHECK: // %bb.0: 34; CHECK-NEXT: cls z0.s, p0/m, z1.s 35; CHECK-NEXT: ret 36 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a, 37 <vscale x 4 x i1> %pg, 38 <vscale x 4 x i32> %b) 39 ret <vscale x 4 x i32> %out 40} 41 42define <vscale x 2 x i64> @cls_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { 43; CHECK-LABEL: cls_i64: 44; CHECK: // %bb.0: 45; CHECK-NEXT: cls z0.d, p0/m, z1.d 46; CHECK-NEXT: ret 47 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a, 48 <vscale x 2 x i1> %pg, 49 <vscale x 2 x i64> %b) 50 ret <vscale x 2 x i64> %out 51} 52 53; 54; CLZ 55; 56 57define <vscale x 16 x i8> @clz_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { 58; CHECK-LABEL: clz_i8: 59; CHECK: // %bb.0: 60; CHECK-NEXT: clz z0.b, p0/m, z1.b 61; CHECK-NEXT: ret 62 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> %a, 63 <vscale x 16 x i1> %pg, 64 <vscale x 16 x i8> %b) 65 ret <vscale x 16 x i8> %out 66} 67 68define <vscale x 8 x i16> @clz_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { 69; CHECK-LABEL: clz_i16: 70; CHECK: // %bb.0: 71; CHECK-NEXT: clz z0.h, p0/m, z1.h 72; CHECK-NEXT: ret 73 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> %a, 74 <vscale x 8 x i1> %pg, 75 <vscale x 8 x i16> %b) 76 ret <vscale x 8 x i16> %out 77} 78 79define <vscale x 4 x i32> @clz_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 80; CHECK-LABEL: clz_i32: 81; CHECK: // %bb.0: 82; CHECK-NEXT: clz z0.s, p0/m, z1.s 83; CHECK-NEXT: ret 84 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %a, 85 <vscale x 4 x i1> %pg, 86 <vscale x 4 x i32> %b) 87 ret <vscale x 4 x i32> %out 88} 89 90define <vscale x 2 x i64> @clz_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { 91; CHECK-LABEL: clz_i64: 92; CHECK: // %bb.0: 93; CHECK-NEXT: clz z0.d, p0/m, z1.d 94; CHECK-NEXT: ret 95 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> %a, 96 <vscale x 2 x i1> %pg, 97 <vscale x 2 x i64> %b) 98 ret <vscale x 2 x i64> %out 99} 100 101; 102; CNT 103; 104 105define <vscale x 16 x i8> @cnt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { 106; CHECK-LABEL: cnt_i8: 107; CHECK: // %bb.0: 108; CHECK-NEXT: cnt z0.b, p0/m, z1.b 109; CHECK-NEXT: ret 110 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> %a, 111 <vscale x 16 x i1> %pg, 112 <vscale x 16 x i8> %b) 113 ret <vscale x 16 x i8> %out 114} 115 116define <vscale x 8 x i16> @cnt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { 117; CHECK-LABEL: cnt_i16: 118; CHECK: // %bb.0: 119; CHECK-NEXT: cnt z0.h, p0/m, z1.h 120; CHECK-NEXT: ret 121 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> %a, 122 <vscale x 8 x i1> %pg, 123 <vscale x 8 x i16> %b) 124 ret <vscale x 8 x i16> %out 125} 126 127define <vscale x 4 x i32> @cnt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 128; CHECK-LABEL: cnt_i32: 129; CHECK: // %bb.0: 130; CHECK-NEXT: cnt z0.s, p0/m, z1.s 131; CHECK-NEXT: ret 132 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %a, 133 <vscale x 4 x i1> %pg, 134 <vscale x 4 x i32> %b) 135 ret <vscale x 4 x i32> %out 136} 137 138define <vscale x 2 x i64> @cnt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { 139; CHECK-LABEL: cnt_i64: 140; CHECK: // %bb.0: 141; CHECK-NEXT: cnt z0.d, p0/m, z1.d 142; CHECK-NEXT: ret 143 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> %a, 144 <vscale x 2 x i1> %pg, 145 <vscale x 2 x i64> %b) 146 ret <vscale x 2 x i64> %out 147} 148 149define <vscale x 8 x i16> @cnt_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) { 150; CHECK-LABEL: cnt_f16: 151; CHECK: // %bb.0: 152; CHECK-NEXT: cnt z0.h, p0/m, z1.h 153; CHECK-NEXT: ret 154 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> %a, 155 <vscale x 8 x i1> %pg, 156 <vscale x 8 x half> %b) 157 ret <vscale x 8 x i16> %out 158} 159 160define <vscale x 8 x i16> @cnt_bf16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) #0 { 161; CHECK-LABEL: cnt_bf16: 162; CHECK: // %bb.0: 163; CHECK-NEXT: cnt z0.h, p0/m, z1.h 164; CHECK-NEXT: ret 165 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %a, 166 <vscale x 8 x i1> %pg, 167 <vscale x 8 x bfloat> %b) 168 ret <vscale x 8 x i16> %out 169} 170 171define <vscale x 4 x i32> @cnt_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) { 172; CHECK-LABEL: cnt_f32: 173; CHECK: // %bb.0: 174; CHECK-NEXT: cnt z0.s, p0/m, z1.s 175; CHECK-NEXT: ret 176 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> %a, 177 <vscale x 4 x i1> %pg, 178 <vscale x 4 x float> %b) 179 ret <vscale x 4 x i32> %out 180} 181 182define <vscale x 2 x i64> @cnt_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) { 183; CHECK-LABEL: cnt_f64: 184; CHECK: // %bb.0: 185; CHECK-NEXT: cnt z0.d, p0/m, z1.d 186; CHECK-NEXT: ret 187 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> %a, 188 <vscale x 2 x i1> %pg, 189 <vscale x 2 x double> %b) 190 ret <vscale x 2 x i64> %out 191} 192 193declare <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 194declare <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 195declare <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 196declare <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 197 198declare <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 199declare <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 200declare <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 201declare <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 202 203declare <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 204declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 205declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 206declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 207declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>) 208declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x bfloat>) 209declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>) 210declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>) 211 212; +bf16 is required for the bfloat version. 213attributes #0 = { "target-features"="+bf16" } 214