1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mattr=+simd128 | FileCheck %s 3 4;; Test that SIMD bitmask instruction can be selected 5 6target triple = "wasm32-unknown-unknown" 7 8define i16 @bitmask_v16i8(<16 x i8> %v) { 9; CHECK-LABEL: bitmask_v16i8: 10; CHECK: .functype bitmask_v16i8 (v128) -> (i32) 11; CHECK-NEXT: # %bb.0: 12; CHECK-NEXT: local.get 0 13; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 14; CHECK-NEXT: i8x16.eq 15; CHECK-NEXT: i8x16.bitmask 16; CHECK-NEXT: # fallthrough-return 17 %cmp = icmp eq <16 x i8> %v, zeroinitializer 18 %bitmask = bitcast <16 x i1> %cmp to i16 19 ret i16 %bitmask 20} 21 22define i8 @bitmask_v8i16(<8 x i16> %v) { 23; CHECK-LABEL: bitmask_v8i16: 24; CHECK: .functype bitmask_v8i16 (v128) -> (i32) 25; CHECK-NEXT: # %bb.0: 26; CHECK-NEXT: local.get 0 27; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0 28; CHECK-NEXT: i16x8.eq 29; CHECK-NEXT: i16x8.bitmask 30; CHECK-NEXT: # fallthrough-return 31 %cmp = icmp eq <8 x i16> %v, zeroinitializer 32 %bitmask = bitcast <8 x i1> %cmp to i8 33 ret i8 %bitmask 34} 35 36define i8 @bitmask_v4i32(<4 x i32> %v) { 37; CHECK-LABEL: bitmask_v4i32: 38; CHECK: .functype bitmask_v4i32 (v128) -> (i32) 39; CHECK-NEXT: # %bb.0: 40; CHECK-NEXT: local.get 0 41; CHECK-NEXT: v128.const 0, 0, 0, 0 42; CHECK-NEXT: i32x4.eq 43; CHECK-NEXT: i32x4.bitmask 44; CHECK-NEXT: # fallthrough-return 45 %cmp = icmp eq <4 x i32> %v, zeroinitializer 46 %bitmask = bitcast <4 x i1> %cmp to i4 47 %ext = zext i4 %bitmask to i8 48 ret i8 %ext 49} 50 51define i8 @bitmask_v2i64(<2 x i64> %v) { 52; CHECK-LABEL: bitmask_v2i64: 53; CHECK: .functype bitmask_v2i64 (v128) -> (i32) 54; CHECK-NEXT: # %bb.0: 55; CHECK-NEXT: local.get 0 56; CHECK-NEXT: v128.const 0, 0 57; CHECK-NEXT: i64x2.eq 58; CHECK-NEXT: i64x2.bitmask 59; CHECK-NEXT: # fallthrough-return 60 %cmp = icmp eq <2 x i64> %v, zeroinitializer 61 %bitmask = bitcast <2 x i1> %cmp to i2 62 %ext = zext i2 %bitmask to i8 63 ret i8 %ext 64} 65 66;; Test unusual vectors 67 68define i1 @bitmask_v1i8(<1 x i8> %v) { 69; CHECK-LABEL: bitmask_v1i8: 70; CHECK: .functype bitmask_v1i8 (v128) -> (i32) 71; CHECK-NEXT: # %bb.0: 72; CHECK-NEXT: local.get 0 73; CHECK-NEXT: i8x16.extract_lane_u 0 74; CHECK-NEXT: i32.eqz 75; CHECK-NEXT: # fallthrough-return 76 %cmp = icmp eq <1 x i8> %v, zeroinitializer 77 %bitmask = bitcast <1 x i1> %cmp to i1 78 ret i1 %bitmask 79} 80 81define i7 @bitmask_v7i8(<7 x i8> %v) { 82; CHECK-LABEL: bitmask_v7i8: 83; CHECK: .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32) 84; CHECK-NEXT: .local v128 85; CHECK-NEXT: # %bb.0: 86; CHECK-NEXT: global.get __stack_pointer 87; CHECK-NEXT: i32.const 16 88; CHECK-NEXT: i32.sub 89; CHECK-NEXT: drop 90; CHECK-NEXT: local.get 0 91; CHECK-NEXT: i8x16.splat 92; CHECK-NEXT: local.get 1 93; CHECK-NEXT: i8x16.replace_lane 1 94; CHECK-NEXT: local.get 2 95; CHECK-NEXT: i8x16.replace_lane 2 96; CHECK-NEXT: local.get 3 97; CHECK-NEXT: i8x16.replace_lane 3 98; CHECK-NEXT: local.get 4 99; CHECK-NEXT: i8x16.replace_lane 4 100; CHECK-NEXT: local.get 5 101; CHECK-NEXT: i8x16.replace_lane 5 102; CHECK-NEXT: local.get 6 103; CHECK-NEXT: i8x16.replace_lane 6 104; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 105; CHECK-NEXT: i8x16.eq 106; CHECK-NEXT: local.tee 7 107; CHECK-NEXT: i16x8.extract_lane_u 0 108; CHECK-NEXT: i32.const 1 109; CHECK-NEXT: i32.and 110; CHECK-NEXT: local.get 7 111; CHECK-NEXT: i16x8.extend_low_i8x16_s 112; CHECK-NEXT: local.tee 7 113; CHECK-NEXT: i16x8.extract_lane_u 1 114; CHECK-NEXT: i32.const 1 115; CHECK-NEXT: i32.and 116; CHECK-NEXT: i32.const 1 117; CHECK-NEXT: i32.shl 118; CHECK-NEXT: i32.or 119; CHECK-NEXT: local.get 7 120; CHECK-NEXT: i16x8.extract_lane_u 2 121; CHECK-NEXT: i32.const 1 122; CHECK-NEXT: i32.and 123; CHECK-NEXT: i32.const 2 124; CHECK-NEXT: i32.shl 125; CHECK-NEXT: i32.or 126; CHECK-NEXT: local.get 7 127; CHECK-NEXT: i16x8.extract_lane_u 3 128; CHECK-NEXT: i32.const 1 129; CHECK-NEXT: i32.and 130; CHECK-NEXT: i32.const 3 131; CHECK-NEXT: i32.shl 132; CHECK-NEXT: i32.or 133; CHECK-NEXT: local.get 7 134; CHECK-NEXT: i16x8.extract_lane_u 4 135; CHECK-NEXT: i32.const 1 136; CHECK-NEXT: i32.and 137; CHECK-NEXT: i32.const 4 138; CHECK-NEXT: i32.shl 139; CHECK-NEXT: i32.or 140; CHECK-NEXT: local.get 7 141; CHECK-NEXT: i16x8.extract_lane_u 5 142; CHECK-NEXT: i32.const 1 143; CHECK-NEXT: i32.and 144; CHECK-NEXT: i32.const 5 145; CHECK-NEXT: i32.shl 146; CHECK-NEXT: i32.or 147; CHECK-NEXT: local.get 7 148; CHECK-NEXT: i16x8.extract_lane_u 6 149; CHECK-NEXT: i32.const 6 150; CHECK-NEXT: i32.shl 151; CHECK-NEXT: i32.or 152; CHECK-NEXT: i32.const 127 153; CHECK-NEXT: i32.and 154; CHECK-NEXT: # fallthrough-return 155 %cmp = icmp eq <7 x i8> %v, zeroinitializer 156 %bitmask = bitcast <7 x i1> %cmp to i7 157 ret i7 %bitmask 158} 159 160define i8 @bitmask_v8i8(<8 x i8> %v) { 161; CHECK-LABEL: bitmask_v8i8: 162; CHECK: .functype bitmask_v8i8 (v128) -> (i32) 163; CHECK-NEXT: # %bb.0: 164; CHECK-NEXT: local.get 0 165; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 166; CHECK-NEXT: i8x16.eq 167; CHECK-NEXT: i16x8.extend_low_i8x16_s 168; CHECK-NEXT: i16x8.bitmask 169; CHECK-NEXT: # fallthrough-return 170 %cmp = icmp eq <8 x i8> %v, zeroinitializer 171 %bitmask = bitcast <8 x i1> %cmp to i8 172 ret i8 %bitmask 173} 174 175define i32 @bitmask_v32i8(<32 x i8> %v) { 176; CHECK-LABEL: bitmask_v32i8: 177; CHECK: .functype bitmask_v32i8 (v128, v128) -> (i32) 178; CHECK-NEXT: .local v128 179; CHECK-NEXT: # %bb.0: 180; CHECK-NEXT: global.get __stack_pointer 181; CHECK-NEXT: i32.const 16 182; CHECK-NEXT: i32.sub 183; CHECK-NEXT: drop 184; CHECK-NEXT: local.get 0 185; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 186; CHECK-NEXT: local.tee 2 187; CHECK-NEXT: i8x16.eq 188; CHECK-NEXT: local.tee 0 189; CHECK-NEXT: i8x16.extract_lane_u 0 190; CHECK-NEXT: i32.const 1 191; CHECK-NEXT: i32.and 192; CHECK-NEXT: local.get 0 193; CHECK-NEXT: i8x16.extract_lane_u 1 194; CHECK-NEXT: i32.const 1 195; CHECK-NEXT: i32.and 196; CHECK-NEXT: i32.const 1 197; CHECK-NEXT: i32.shl 198; CHECK-NEXT: i32.or 199; CHECK-NEXT: local.get 0 200; CHECK-NEXT: i8x16.extract_lane_u 2 201; CHECK-NEXT: i32.const 1 202; CHECK-NEXT: i32.and 203; CHECK-NEXT: i32.const 2 204; CHECK-NEXT: i32.shl 205; CHECK-NEXT: i32.or 206; CHECK-NEXT: local.get 0 207; CHECK-NEXT: i8x16.extract_lane_u 3 208; CHECK-NEXT: i32.const 1 209; CHECK-NEXT: i32.and 210; CHECK-NEXT: i32.const 3 211; CHECK-NEXT: i32.shl 212; CHECK-NEXT: i32.or 213; CHECK-NEXT: local.get 0 214; CHECK-NEXT: i8x16.extract_lane_u 4 215; CHECK-NEXT: i32.const 1 216; CHECK-NEXT: i32.and 217; CHECK-NEXT: i32.const 4 218; CHECK-NEXT: i32.shl 219; CHECK-NEXT: i32.or 220; CHECK-NEXT: local.get 0 221; CHECK-NEXT: i8x16.extract_lane_u 5 222; CHECK-NEXT: i32.const 1 223; CHECK-NEXT: i32.and 224; CHECK-NEXT: i32.const 5 225; CHECK-NEXT: i32.shl 226; CHECK-NEXT: i32.or 227; CHECK-NEXT: local.get 0 228; CHECK-NEXT: i8x16.extract_lane_u 6 229; CHECK-NEXT: i32.const 1 230; CHECK-NEXT: i32.and 231; CHECK-NEXT: i32.const 6 232; CHECK-NEXT: i32.shl 233; CHECK-NEXT: i32.or 234; CHECK-NEXT: local.get 0 235; CHECK-NEXT: i8x16.extract_lane_u 7 236; CHECK-NEXT: i32.const 1 237; CHECK-NEXT: i32.and 238; CHECK-NEXT: i32.const 7 239; CHECK-NEXT: i32.shl 240; CHECK-NEXT: i32.or 241; CHECK-NEXT: local.get 0 242; CHECK-NEXT: i8x16.extract_lane_u 8 243; CHECK-NEXT: i32.const 1 244; CHECK-NEXT: i32.and 245; CHECK-NEXT: i32.const 8 246; CHECK-NEXT: i32.shl 247; CHECK-NEXT: i32.or 248; CHECK-NEXT: local.get 0 249; CHECK-NEXT: i8x16.extract_lane_u 9 250; CHECK-NEXT: i32.const 1 251; CHECK-NEXT: i32.and 252; CHECK-NEXT: i32.const 9 253; CHECK-NEXT: i32.shl 254; CHECK-NEXT: i32.or 255; CHECK-NEXT: local.get 0 256; CHECK-NEXT: i8x16.extract_lane_u 10 257; CHECK-NEXT: i32.const 1 258; CHECK-NEXT: i32.and 259; CHECK-NEXT: i32.const 10 260; CHECK-NEXT: i32.shl 261; CHECK-NEXT: i32.or 262; CHECK-NEXT: local.get 0 263; CHECK-NEXT: i8x16.extract_lane_u 11 264; CHECK-NEXT: i32.const 1 265; CHECK-NEXT: i32.and 266; CHECK-NEXT: i32.const 11 267; CHECK-NEXT: i32.shl 268; CHECK-NEXT: i32.or 269; CHECK-NEXT: local.get 0 270; CHECK-NEXT: i8x16.extract_lane_u 12 271; CHECK-NEXT: i32.const 1 272; CHECK-NEXT: i32.and 273; CHECK-NEXT: i32.const 12 274; CHECK-NEXT: i32.shl 275; CHECK-NEXT: i32.or 276; CHECK-NEXT: local.get 0 277; CHECK-NEXT: i8x16.extract_lane_u 13 278; CHECK-NEXT: i32.const 1 279; CHECK-NEXT: i32.and 280; CHECK-NEXT: i32.const 13 281; CHECK-NEXT: i32.shl 282; CHECK-NEXT: i32.or 283; CHECK-NEXT: local.get 0 284; CHECK-NEXT: i8x16.extract_lane_u 14 285; CHECK-NEXT: i32.const 1 286; CHECK-NEXT: i32.and 287; CHECK-NEXT: i32.const 14 288; CHECK-NEXT: i32.shl 289; CHECK-NEXT: i32.or 290; CHECK-NEXT: local.get 0 291; CHECK-NEXT: i8x16.extract_lane_u 15 292; CHECK-NEXT: i32.const 15 293; CHECK-NEXT: i32.shl 294; CHECK-NEXT: i32.or 295; CHECK-NEXT: i32.const 65535 296; CHECK-NEXT: i32.and 297; CHECK-NEXT: local.get 1 298; CHECK-NEXT: local.get 2 299; CHECK-NEXT: i8x16.eq 300; CHECK-NEXT: local.tee 0 301; CHECK-NEXT: i8x16.extract_lane_u 15 302; CHECK-NEXT: i32.const 31 303; CHECK-NEXT: i32.shl 304; CHECK-NEXT: local.get 0 305; CHECK-NEXT: i8x16.extract_lane_u 14 306; CHECK-NEXT: i32.const 1 307; CHECK-NEXT: i32.and 308; CHECK-NEXT: i32.const 30 309; CHECK-NEXT: i32.shl 310; CHECK-NEXT: local.get 0 311; CHECK-NEXT: i8x16.extract_lane_u 13 312; CHECK-NEXT: i32.const 1 313; CHECK-NEXT: i32.and 314; CHECK-NEXT: i32.const 29 315; CHECK-NEXT: i32.shl 316; CHECK-NEXT: local.get 0 317; CHECK-NEXT: i8x16.extract_lane_u 12 318; CHECK-NEXT: i32.const 1 319; CHECK-NEXT: i32.and 320; CHECK-NEXT: i32.const 28 321; CHECK-NEXT: i32.shl 322; CHECK-NEXT: local.get 0 323; CHECK-NEXT: i8x16.extract_lane_u 11 324; CHECK-NEXT: i32.const 1 325; CHECK-NEXT: i32.and 326; CHECK-NEXT: i32.const 27 327; CHECK-NEXT: i32.shl 328; CHECK-NEXT: local.get 0 329; CHECK-NEXT: i8x16.extract_lane_u 10 330; CHECK-NEXT: i32.const 1 331; CHECK-NEXT: i32.and 332; CHECK-NEXT: i32.const 26 333; CHECK-NEXT: i32.shl 334; CHECK-NEXT: local.get 0 335; CHECK-NEXT: i8x16.extract_lane_u 9 336; CHECK-NEXT: i32.const 1 337; CHECK-NEXT: i32.and 338; CHECK-NEXT: i32.const 25 339; CHECK-NEXT: i32.shl 340; CHECK-NEXT: local.get 0 341; CHECK-NEXT: i8x16.extract_lane_u 8 342; CHECK-NEXT: i32.const 1 343; CHECK-NEXT: i32.and 344; CHECK-NEXT: i32.const 24 345; CHECK-NEXT: i32.shl 346; CHECK-NEXT: local.get 0 347; CHECK-NEXT: i8x16.extract_lane_u 7 348; CHECK-NEXT: i32.const 1 349; CHECK-NEXT: i32.and 350; CHECK-NEXT: i32.const 23 351; CHECK-NEXT: i32.shl 352; CHECK-NEXT: local.get 0 353; CHECK-NEXT: i8x16.extract_lane_u 6 354; CHECK-NEXT: i32.const 1 355; CHECK-NEXT: i32.and 356; CHECK-NEXT: i32.const 22 357; CHECK-NEXT: i32.shl 358; CHECK-NEXT: local.get 0 359; CHECK-NEXT: i8x16.extract_lane_u 5 360; CHECK-NEXT: i32.const 1 361; CHECK-NEXT: i32.and 362; CHECK-NEXT: i32.const 21 363; CHECK-NEXT: i32.shl 364; CHECK-NEXT: local.get 0 365; CHECK-NEXT: i8x16.extract_lane_u 4 366; CHECK-NEXT: i32.const 1 367; CHECK-NEXT: i32.and 368; CHECK-NEXT: i32.const 20 369; CHECK-NEXT: i32.shl 370; CHECK-NEXT: local.get 0 371; CHECK-NEXT: i8x16.extract_lane_u 3 372; CHECK-NEXT: i32.const 1 373; CHECK-NEXT: i32.and 374; CHECK-NEXT: i32.const 19 375; CHECK-NEXT: i32.shl 376; CHECK-NEXT: local.get 0 377; CHECK-NEXT: i8x16.extract_lane_u 2 378; CHECK-NEXT: i32.const 1 379; CHECK-NEXT: i32.and 380; CHECK-NEXT: i32.const 18 381; CHECK-NEXT: i32.shl 382; CHECK-NEXT: local.get 0 383; CHECK-NEXT: i8x16.extract_lane_u 1 384; CHECK-NEXT: i32.const 1 385; CHECK-NEXT: i32.and 386; CHECK-NEXT: i32.const 17 387; CHECK-NEXT: i32.shl 388; CHECK-NEXT: local.get 0 389; CHECK-NEXT: i8x16.extract_lane_u 0 390; CHECK-NEXT: i32.const 1 391; CHECK-NEXT: i32.and 392; CHECK-NEXT: i32.const 16 393; CHECK-NEXT: i32.shl 394; CHECK-NEXT: i32.or 395; CHECK-NEXT: i32.or 396; CHECK-NEXT: i32.or 397; CHECK-NEXT: i32.or 398; CHECK-NEXT: i32.or 399; CHECK-NEXT: i32.or 400; CHECK-NEXT: i32.or 401; CHECK-NEXT: i32.or 402; CHECK-NEXT: i32.or 403; CHECK-NEXT: i32.or 404; CHECK-NEXT: i32.or 405; CHECK-NEXT: i32.or 406; CHECK-NEXT: i32.or 407; CHECK-NEXT: i32.or 408; CHECK-NEXT: i32.or 409; CHECK-NEXT: i32.or 410; CHECK-NEXT: # fallthrough-return 411 %cmp = icmp eq <32 x i8> %v, zeroinitializer 412 %bitmask = bitcast <32 x i1> %cmp to i32 413 ret i32 %bitmask 414} 415