1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mattr=+simd128 | FileCheck %s 3 4;; Test that SIMD extending operations can be successfully selected 5 6target triple = "wasm32-unknown-unknown" 7 8define <8 x i16> @extend_low_i8x16_s(<16 x i8> %v) { 9; CHECK-LABEL: extend_low_i8x16_s: 10; CHECK: .functype extend_low_i8x16_s (v128) -> (v128) 11; CHECK-NEXT: # %bb.0: 12; CHECK-NEXT: local.get 0 13; CHECK-NEXT: i16x8.extend_low_i8x16_s 14; CHECK-NEXT: # fallthrough-return 15 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 16 <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 17 %extended = sext <8 x i8> %low to <8 x i16> 18 ret <8 x i16> %extended 19} 20 21define <8 x i16> @extend_low_i8x16_u(<16 x i8> %v) { 22; CHECK-LABEL: extend_low_i8x16_u: 23; CHECK: .functype extend_low_i8x16_u (v128) -> (v128) 24; CHECK-NEXT: # %bb.0: 25; CHECK-NEXT: local.get 0 26; CHECK-NEXT: i16x8.extend_low_i8x16_u 27; CHECK-NEXT: # fallthrough-return 28 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 29 <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 30 %extended = zext <8 x i8> %low to <8 x i16> 31 ret <8 x i16> %extended 32} 33 34define <8 x i16> @extend_high_i8x16_s(<16 x i8> %v) { 35; CHECK-LABEL: extend_high_i8x16_s: 36; CHECK: .functype extend_high_i8x16_s (v128) -> (v128) 37; CHECK-NEXT: # %bb.0: 38; CHECK-NEXT: local.get 0 39; CHECK-NEXT: i16x8.extend_high_i8x16_s 40; CHECK-NEXT: # fallthrough-return 41 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 42 <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 43 %extended = sext <8 x i8> %low to <8 x i16> 44 ret <8 x i16> %extended 45} 46 47define <8 x i16> @extend_high_i8x16_u(<16 x i8> %v) { 48; CHECK-LABEL: extend_high_i8x16_u: 49; CHECK: .functype extend_high_i8x16_u (v128) -> (v128) 50; CHECK-NEXT: # %bb.0: 51; CHECK-NEXT: local.get 0 52; CHECK-NEXT: i16x8.extend_high_i8x16_u 53; CHECK-NEXT: # fallthrough-return 54 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 55 <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 56 %extended = zext <8 x i8> %low to <8 x i16> 57 ret <8 x i16> %extended 58} 59 60define <4 x i32> @extend_low_i16x8_s(<8 x i16> %v) { 61; CHECK-LABEL: extend_low_i16x8_s: 62; CHECK: .functype extend_low_i16x8_s (v128) -> (v128) 63; CHECK-NEXT: # %bb.0: 64; CHECK-NEXT: local.get 0 65; CHECK-NEXT: i32x4.extend_low_i16x8_s 66; CHECK-NEXT: # fallthrough-return 67 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 68 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 69 %extended = sext <4 x i16> %low to <4 x i32> 70 ret <4 x i32> %extended 71} 72 73define <4 x i32> @extend_low_i16x8_u(<8 x i16> %v) { 74; CHECK-LABEL: extend_low_i16x8_u: 75; CHECK: .functype extend_low_i16x8_u (v128) -> (v128) 76; CHECK-NEXT: # %bb.0: 77; CHECK-NEXT: local.get 0 78; CHECK-NEXT: i32x4.extend_low_i16x8_u 79; CHECK-NEXT: # fallthrough-return 80 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 81 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 82 %extended = zext <4 x i16> %low to <4 x i32> 83 ret <4 x i32> %extended 84} 85 86define <4 x i32> @extend_high_i16x8_s(<8 x i16> %v) { 87; CHECK-LABEL: extend_high_i16x8_s: 88; CHECK: .functype extend_high_i16x8_s (v128) -> (v128) 89; CHECK-NEXT: # %bb.0: 90; CHECK-NEXT: local.get 0 91; CHECK-NEXT: i32x4.extend_high_i16x8_s 92; CHECK-NEXT: # fallthrough-return 93 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 94 <4 x i32> <i32 4, i32 5, i32 6, i32 7> 95 %extended = sext <4 x i16> %low to <4 x i32> 96 ret <4 x i32> %extended 97} 98 99define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) { 100; CHECK-LABEL: extend_high_i16x8_u: 101; CHECK: .functype extend_high_i16x8_u (v128) -> (v128) 102; CHECK-NEXT: # %bb.0: 103; CHECK-NEXT: local.get 0 104; CHECK-NEXT: i32x4.extend_high_i16x8_u 105; CHECK-NEXT: # fallthrough-return 106 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 107 <4 x i32> <i32 4, i32 5, i32 6, i32 7> 108 %extended = zext <4 x i16> %low to <4 x i32> 109 ret <4 x i32> %extended 110} 111 112define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) { 113; CHECK-LABEL: extend_low_i32x4_s: 114; CHECK: .functype extend_low_i32x4_s (v128) -> (v128) 115; CHECK-NEXT: # %bb.0: 116; CHECK-NEXT: local.get 0 117; CHECK-NEXT: i64x2.extend_low_i32x4_s 118; CHECK-NEXT: # fallthrough-return 119 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 120 <2 x i32> <i32 0, i32 1> 121 %extended = sext <2 x i32> %low to <2 x i64> 122 ret <2 x i64> %extended 123} 124 125define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) { 126; CHECK-LABEL: extend_low_i32x4_u: 127; CHECK: .functype extend_low_i32x4_u (v128) -> (v128) 128; CHECK-NEXT: # %bb.0: 129; CHECK-NEXT: local.get 0 130; CHECK-NEXT: i64x2.extend_low_i32x4_u 131; CHECK-NEXT: # fallthrough-return 132 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 133 <2 x i32> <i32 0, i32 1> 134 %extended = zext <2 x i32> %low to <2 x i64> 135 ret <2 x i64> %extended 136} 137 138define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) { 139; CHECK-LABEL: extend_high_i32x4_s: 140; CHECK: .functype extend_high_i32x4_s (v128) -> (v128) 141; CHECK-NEXT: # %bb.0: 142; CHECK-NEXT: local.get 0 143; CHECK-NEXT: i64x2.extend_high_i32x4_s 144; CHECK-NEXT: # fallthrough-return 145 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 146 <2 x i32> <i32 2, i32 3> 147 %extended = sext <2 x i32> %low to <2 x i64> 148 ret <2 x i64> %extended 149} 150 151define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) { 152; CHECK-LABEL: extend_high_i32x4_u: 153; CHECK: .functype extend_high_i32x4_u (v128) -> (v128) 154; CHECK-NEXT: # %bb.0: 155; CHECK-NEXT: local.get 0 156; CHECK-NEXT: i64x2.extend_high_i32x4_u 157; CHECK-NEXT: # fallthrough-return 158 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 159 <2 x i32> <i32 2, i32 3> 160 %extended = zext <2 x i32> %low to <2 x i64> 161 ret <2 x i64> %extended 162} 163 164;; Also test that similar patterns with offsets not corresponding to 165;; the low or high half are correctly expanded. 166 167define <8 x i16> @extend_lowish_i8x16_s(<16 x i8> %v) { 168; CHECK-LABEL: extend_lowish_i8x16_s: 169; CHECK: .functype extend_lowish_i8x16_s (v128) -> (v128) 170; CHECK-NEXT: # %bb.0: 171; CHECK-NEXT: local.get 0 172; CHECK-NEXT: local.get 0 173; CHECK-NEXT: i8x16.shuffle 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0 174; CHECK-NEXT: i16x8.extend_low_i8x16_s 175; CHECK-NEXT: # fallthrough-return 176 %lowish = shufflevector <16 x i8> %v, <16 x i8> undef, 177 <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 178 %extended = sext <8 x i8> %lowish to <8 x i16> 179 ret <8 x i16> %extended 180} 181 182define <4 x i32> @extend_lowish_i16x8_s(<8 x i16> %v) { 183; CHECK-LABEL: extend_lowish_i16x8_s: 184; CHECK: .functype extend_lowish_i16x8_s (v128) -> (v128) 185; CHECK-NEXT: # %bb.0: 186; CHECK-NEXT: local.get 0 187; CHECK-NEXT: local.get 0 188; CHECK-NEXT: i8x16.shuffle 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 0, 1, 0, 1, 0, 1 189; CHECK-NEXT: i32x4.extend_low_i16x8_s 190; CHECK-NEXT: # fallthrough-return 191 %lowish = shufflevector <8 x i16> %v, <8 x i16> undef, 192 <4 x i32> <i32 1, i32 2, i32 3, i32 4> 193 %extended = sext <4 x i16> %lowish to <4 x i32> 194 ret <4 x i32> %extended 195} 196 197;; Also test vectors that aren't full 128 bits, or might require 198;; multiple extensions 199 200define <16 x i8> @extend_i1x16_i8(<16 x i1> %v) { 201; CHECK-LABEL: extend_i1x16_i8: 202; CHECK: .functype extend_i1x16_i8 (v128) -> (v128) 203; CHECK-NEXT: # %bb.0: 204; CHECK-NEXT: local.get 0 205; CHECK-NEXT: i32.const 7 206; CHECK-NEXT: i8x16.shl 207; CHECK-NEXT: i32.const 7 208; CHECK-NEXT: i8x16.shr_s 209; CHECK-NEXT: # fallthrough-return 210 %extended = sext <16 x i1> %v to <16 x i8> 211 ret <16 x i8> %extended 212} 213 214define <8 x i8> @extend_i1x8_i8(<8 x i1> %v) { 215; CHECK-LABEL: extend_i1x8_i8: 216; CHECK: .functype extend_i1x8_i8 (v128) -> (v128) 217; CHECK-NEXT: # %bb.0: 218; CHECK-NEXT: local.get 0 219; CHECK-NEXT: local.get 0 220; CHECK-NEXT: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0 221; CHECK-NEXT: i32.const 7 222; CHECK-NEXT: i8x16.shl 223; CHECK-NEXT: i32.const 7 224; CHECK-NEXT: i8x16.shr_s 225; CHECK-NEXT: # fallthrough-return 226 %extended = sext <8 x i1> %v to <8 x i8> 227 ret <8 x i8> %extended 228} 229 230define <8 x i16> @extend_i1x8_i16(<8 x i1> %v) { 231; CHECK-LABEL: extend_i1x8_i16: 232; CHECK: .functype extend_i1x8_i16 (v128) -> (v128) 233; CHECK-NEXT: # %bb.0: 234; CHECK-NEXT: local.get 0 235; CHECK-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1 236; CHECK-NEXT: v128.and 237; CHECK-NEXT: # fallthrough-return 238 %extended = zext <8 x i1> %v to <8 x i16> 239 ret <8 x i16> %extended 240} 241 242define <4 x i32> @extend_i8x4_i32(<4 x i8> %v) { 243; CHECK-LABEL: extend_i8x4_i32: 244; CHECK: .functype extend_i8x4_i32 (v128) -> (v128) 245; CHECK-NEXT: # %bb.0: 246; CHECK-NEXT: local.get 0 247; CHECK-NEXT: i16x8.extend_low_i8x16_u 248; CHECK-NEXT: i32x4.extend_low_i16x8_u 249; CHECK-NEXT: # fallthrough-return 250 %extended = zext <4 x i8> %v to <4 x i32> 251 ret <4 x i32> %extended 252} 253 254define <2 x i64> @extend_i8x2_i64(<2 x i8> %v) { 255; CHECK-LABEL: extend_i8x2_i64: 256; CHECK: .functype extend_i8x2_i64 (v128) -> (v128) 257; CHECK-NEXT: # %bb.0: 258; CHECK-NEXT: local.get 0 259; CHECK-NEXT: i16x8.extend_low_i8x16_s 260; CHECK-NEXT: i32x4.extend_low_i16x8_s 261; CHECK-NEXT: i64x2.extend_low_i32x4_s 262; CHECK-NEXT: # fallthrough-return 263 %extended = sext <2 x i8> %v to <2 x i64> 264 ret <2 x i64> %extended 265} 266