1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; ====== Scalar Tests ===== 6 7; ====== Scalar bswap.i16 Tests ===== 8define i16 @bswap_i16_to_i16_anyext(i16 %a){ 9; CHECK-SD-LABEL: bswap_i16_to_i16_anyext: 10; CHECK-SD: // %bb.0: 11; CHECK-SD-NEXT: rev16 w0, w0 12; CHECK-SD-NEXT: ret 13; 14; CHECK-GI-LABEL: bswap_i16_to_i16_anyext: 15; CHECK-GI: // %bb.0: 16; CHECK-GI-NEXT: rev w8, w0 17; CHECK-GI-NEXT: lsr w0, w8, #16 18; CHECK-GI-NEXT: ret 19 %3 = call i16 @llvm.bswap.i16(i16 %a) 20 ret i16 %3 21} 22declare i16 @llvm.bswap.i16(i16) 23 24; The zext here is optimised to an any_extend during isel. 25define i64 @bswap_i16_to_i64_anyext(i16 %a) { 26; CHECK-SD-LABEL: bswap_i16_to_i64_anyext: 27; CHECK-SD: // %bb.0: 28; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 29; CHECK-SD-NEXT: rev16 x8, x0 30; CHECK-SD-NEXT: lsl x0, x8, #48 31; CHECK-SD-NEXT: ret 32; 33; CHECK-GI-LABEL: bswap_i16_to_i64_anyext: 34; CHECK-GI: // %bb.0: 35; CHECK-GI-NEXT: rev w8, w0 36; CHECK-GI-NEXT: lsr w8, w8, #16 37; CHECK-GI-NEXT: and x8, x8, #0xffff 38; CHECK-GI-NEXT: lsl x0, x8, #48 39; CHECK-GI-NEXT: ret 40 %3 = call i16 @llvm.bswap.i16(i16 %a) 41 %4 = zext i16 %3 to i64 42 %5 = shl i64 %4, 48 43 ret i64 %5 44} 45 46; The zext here is optimised to an any_extend during isel.. 47define i128 @bswap_i16_to_i128_anyext(i16 %a) { 48; CHECK-SD-LABEL: bswap_i16_to_i128_anyext: 49; CHECK-SD: // %bb.0: 50; CHECK-SD-NEXT: mov w8, w0 51; CHECK-SD-NEXT: mov x0, xzr 52; CHECK-SD-NEXT: rev w8, w8 53; CHECK-SD-NEXT: lsr w8, w8, #16 54; CHECK-SD-NEXT: lsl x1, x8, #48 55; CHECK-SD-NEXT: ret 56; 57; CHECK-GI-LABEL: bswap_i16_to_i128_anyext: 58; CHECK-GI: // %bb.0: 59; CHECK-GI-NEXT: mov w8, w0 60; CHECK-GI-NEXT: mov x0, xzr 61; CHECK-GI-NEXT: rev w8, w8 62; CHECK-GI-NEXT: lsr w8, w8, #16 63; CHECK-GI-NEXT: and x8, x8, #0xffff 64; CHECK-GI-NEXT: lsl x1, x8, #48 65; CHECK-GI-NEXT: ret 66 %3 = call i16 @llvm.bswap.i16(i16 %a) 67 %4 = zext i16 %3 to i128 68 %5 = shl i128 %4, 112 69 ret i128 %5 70} 71 72define i32 @bswap_i16_to_i32_zext(i16 %a){ 73; CHECK-LABEL: bswap_i16_to_i32_zext: 74; CHECK: // %bb.0: 75; CHECK-NEXT: rev w8, w0 76; CHECK-NEXT: lsr w0, w8, #16 77; CHECK-NEXT: ret 78 %3 = call i16 @llvm.bswap.i16(i16 %a) 79 %4 = zext i16 %3 to i32 80 ret i32 %4 81} 82 83; ====== Other scalar bswap tests ===== 84define i32 @bswap_i32(i32 %a){ 85; CHECK-LABEL: bswap_i32: 86; CHECK: // %bb.0: 87; CHECK-NEXT: rev w0, w0 88; CHECK-NEXT: ret 89 %3 = call i32 @llvm.bswap.i32(i32 %a) 90 ret i32 %3 91} 92declare i32 @llvm.bswap.i32(i32) 93 94define i64 @bswap_i64(i64 %a){ 95; CHECK-LABEL: bswap_i64: 96; CHECK: // %bb.0: 97; CHECK-NEXT: rev x0, x0 98; CHECK-NEXT: ret 99 %3 = call i64 @llvm.bswap.i64(i64 %a) 100 ret i64 %3 101} 102declare i64 @llvm.bswap.i64(i64) 103 104define i128 @bswap_i128(i128 %a){ 105; CHECK-LABEL: bswap_i128: 106; CHECK: // %bb.0: 107; CHECK-NEXT: rev x8, x1 108; CHECK-NEXT: rev x1, x0 109; CHECK-NEXT: mov x0, x8 110; CHECK-NEXT: ret 111 %3 = call i128 @llvm.bswap.i128(i128 %a) 112 ret i128 %3 113} 114declare i128 @llvm.bswap.i128(i128) 115 116; ===== Legal Vector Type Tests ===== 117 118define <4 x i16> @bswap_v4i16(<4 x i16> %a){ 119; CHECK-LABEL: bswap_v4i16: 120; CHECK: // %bb.0: 121; CHECK-NEXT: rev16 v0.8b, v0.8b 122; CHECK-NEXT: ret 123 %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a) 124 ret <4 x i16> %3 125} 126declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) 127 128define <8 x i16> @bswap_v8i16(<8 x i16> %a){ 129; CHECK-LABEL: bswap_v8i16: 130; CHECK: // %bb.0: 131; CHECK-NEXT: rev16 v0.16b, v0.16b 132; CHECK-NEXT: ret 133 %3 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) 134 ret <8 x i16> %3 135} 136declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 137 138define <2 x i32> @bswap_v2i32(<2 x i32> %a){ 139; CHECK-LABEL: bswap_v2i32: 140; CHECK: // %bb.0: 141; CHECK-NEXT: rev32 v0.8b, v0.8b 142; CHECK-NEXT: ret 143 %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) 144 ret <2 x i32> %3 145} 146declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) 147 148define <4 x i32> @bswap_v4i32(<4 x i32> %a){ 149; CHECK-LABEL: bswap_v4i32: 150; CHECK: // %bb.0: 151; CHECK-NEXT: rev32 v0.16b, v0.16b 152; CHECK-NEXT: ret 153 %3 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a) 154 ret <4 x i32> %3 155} 156declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 157 158define <2 x i64> @bswap_v2i64(<2 x i64> %a){ 159; CHECK-LABEL: bswap_v2i64: 160; CHECK: // %bb.0: 161; CHECK-NEXT: rev64 v0.16b, v0.16b 162; CHECK-NEXT: ret 163 %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) 164 ret <2 x i64> %3 165} 166declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 167 168; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== 169 170define <2 x i16> @bswap_v2i16(<2 x i16> %a){ 171; CHECK-SD-LABEL: bswap_v2i16: 172; CHECK-SD: // %bb.0: // %entry 173; CHECK-SD-NEXT: rev32 v0.8b, v0.8b 174; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 175; CHECK-SD-NEXT: ret 176; 177; CHECK-GI-LABEL: bswap_v2i16: 178; CHECK-GI: // %bb.0: // %entry 179; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 180; CHECK-GI-NEXT: rev16 v0.8b, v0.8b 181; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 182; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 183; CHECK-GI-NEXT: ret 184entry: 185 %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) 186 ret <2 x i16> %res 187} 188declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) 189 190define <16 x i16> @bswap_v16i16(<16 x i16> %a){ 191; CHECK-LABEL: bswap_v16i16: 192; CHECK: // %bb.0: // %entry 193; CHECK-NEXT: rev16 v0.16b, v0.16b 194; CHECK-NEXT: rev16 v1.16b, v1.16b 195; CHECK-NEXT: ret 196entry: 197 %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a) 198 ret <16 x i16> %res 199} 200declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) 201 202define <1 x i32> @bswap_v1i32(<1 x i32> %a){ 203; CHECK-SD-LABEL: bswap_v1i32: 204; CHECK-SD: // %bb.0: // %entry 205; CHECK-SD-NEXT: rev32 v0.8b, v0.8b 206; CHECK-SD-NEXT: ret 207; 208; CHECK-GI-LABEL: bswap_v1i32: 209; CHECK-GI: // %bb.0: // %entry 210; CHECK-GI-NEXT: fmov w8, s0 211; CHECK-GI-NEXT: rev w8, w8 212; CHECK-GI-NEXT: mov v0.s[0], w8 213; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 214; CHECK-GI-NEXT: ret 215entry: 216 %res = call <1 x i32> @llvm.bswap.v1i32(<1 x i32> %a) 217 ret <1 x i32> %res 218} 219declare <1 x i32> @llvm.bswap.v1i32(<1 x i32>) 220 221define <8 x i32> @bswap_v8i32(<8 x i32> %a){ 222; CHECK-LABEL: bswap_v8i32: 223; CHECK: // %bb.0: // %entry 224; CHECK-NEXT: rev32 v0.16b, v0.16b 225; CHECK-NEXT: rev32 v1.16b, v1.16b 226; CHECK-NEXT: ret 227entry: 228 %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a) 229 ret <8 x i32> %res 230} 231declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) 232 233define <4 x i64> @bswap_v4i64(<4 x i64> %a){ 234; CHECK-LABEL: bswap_v4i64: 235; CHECK: // %bb.0: // %entry 236; CHECK-NEXT: rev64 v0.16b, v0.16b 237; CHECK-NEXT: rev64 v1.16b, v1.16b 238; CHECK-NEXT: ret 239entry: 240 %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a) 241 ret <4 x i64> %res 242} 243declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) 244 245; ===== Vectors with Non-Pow 2 Widths ===== 246 247define <3 x i16> @bswap_v3i16(<3 x i16> %a){ 248; CHECK-LABEL: bswap_v3i16: 249; CHECK: // %bb.0: // %entry 250; CHECK-NEXT: rev16 v0.8b, v0.8b 251; CHECK-NEXT: ret 252entry: 253 %res = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> %a) 254 ret <3 x i16> %res 255} 256declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) 257 258define <7 x i16> @bswap_v7i16(<7 x i16> %a){ 259; CHECK-LABEL: bswap_v7i16: 260; CHECK: // %bb.0: // %entry 261; CHECK-NEXT: rev16 v0.16b, v0.16b 262; CHECK-NEXT: ret 263entry: 264 %res = call <7 x i16> @llvm.bswap.v7i16(<7 x i16> %a) 265 ret <7 x i16> %res 266} 267declare <7 x i16> @llvm.bswap.v7i16(<7 x i16>) 268 269define <3 x i32> @bswap_v3i32(<3 x i32> %a){ 270; CHECK-LABEL: bswap_v3i32: 271; CHECK: // %bb.0: // %entry 272; CHECK-NEXT: rev32 v0.16b, v0.16b 273; CHECK-NEXT: ret 274entry: 275 %res = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> %a) 276 ret <3 x i32> %res 277} 278declare <3 x i32> @llvm.bswap.v3i32(<3 x i32>) 279