1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=16>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN16 3; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=32>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN32 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 6define void @load_add_store_v2i16(ptr %pa, ptr %pb) { 7; MIN16-LABEL: @load_add_store_v2i16( 8; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 9; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 10; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 11; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 12; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 13; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 14; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] 15; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] 16; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 17; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 18; MIN16-NEXT: ret void 19; 20; MIN32-LABEL: @load_add_store_v2i16( 21; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 22; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8 23; MIN32-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]] 24; MIN32-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8 25; MIN32-NEXT: ret void 26; 27 %a = load <2 x i16>, ptr %pa, align 8 28 %b = load <2 x i16>, ptr %pb, align 8 29 %c = add <2 x i16> %a, %b 30 store <2 x i16> %c, ptr %pa, align 8 31 ret void 32} 33 34define void @load_add_store_v3i16(ptr %pa, ptr %pb) { 35; MIN16-LABEL: @load_add_store_v3i16( 36; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 37; MIN16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 38; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 39; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 40; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 41; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 42; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 43; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 44; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 45; MIN16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 46; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] 47; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] 48; MIN16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] 49; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 50; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 51; MIN16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 52; MIN16-NEXT: ret void 53; 54; MIN32-LABEL: @load_add_store_v3i16( 55; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1 56; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 57; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 58; MIN32-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 4 59; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8 60; MIN32-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 4 61; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]] 62; MIN32-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] 63; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 64; MIN32-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 4 65; MIN32-NEXT: ret void 66; 67 %a = load <3 x i16>, ptr %pa, align 8 68 %b = load <3 x i16>, ptr %pb, align 8 69 %c = add <3 x i16> %a, %b 70 store <3 x i16> %c, ptr %pa, align 8 71 ret void 72} 73 74define void @load_add_store_v4i16(ptr %pa, ptr %pb) { 75; MIN16-LABEL: @load_add_store_v4i16( 76; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 77; MIN16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 78; MIN16-NEXT: [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3 79; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 80; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 81; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 82; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 83; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 84; MIN16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 85; MIN16-NEXT: [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2 86; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 87; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 88; MIN16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 89; MIN16-NEXT: [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2 90; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] 91; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] 92; MIN16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] 93; MIN16-NEXT: [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]] 94; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 95; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 96; MIN16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 97; MIN16-NEXT: store i16 [[C_I3]], ptr [[PA_I3]], align 2 98; MIN16-NEXT: ret void 99; 100; MIN32-LABEL: @load_add_store_v4i16( 101; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1 102; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 103; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 104; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 105; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8 106; MIN32-NEXT: [[B_I1:%.*]] = load <2 x i16>, ptr [[PB_I1]], align 4 107; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]] 108; MIN32-NEXT: [[C_I1:%.*]] = add <2 x i16> [[A_I1]], [[B_I1]] 109; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 110; MIN32-NEXT: store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4 111; MIN32-NEXT: ret void 112; 113 %a = load <4 x i16>, ptr %pa, align 8 114 %b = load <4 x i16>, ptr %pb, align 8 115 %c = add <4 x i16> %a, %b 116 store <4 x i16> %c, ptr %pa, align 8 117 ret void 118} 119 120define void @load_add_store_v4i10(ptr %pa, ptr %pb) { 121; MIN16-LABEL: @load_add_store_v4i10( 122; MIN16-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8 123; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i10> [[A]], i64 0 124; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 1 125; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i10> [[A]], i64 2 126; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i10> [[A]], i64 3 127; MIN16-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8 128; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x i10> [[B]], i64 0 129; MIN16-NEXT: [[C_I0:%.*]] = add i10 [[A_I0]], [[B_I0]] 130; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 1 131; MIN16-NEXT: [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]] 132; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x i10> [[B]], i64 2 133; MIN16-NEXT: [[C_I2:%.*]] = add i10 [[A_I2]], [[B_I2]] 134; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x i10> [[B]], i64 3 135; MIN16-NEXT: [[C_I3:%.*]] = add i10 [[A_I3]], [[B_I3]] 136; MIN16-NEXT: [[C_UPTO0:%.*]] = insertelement <4 x i10> poison, i10 [[C_I0]], i64 0 137; MIN16-NEXT: [[C_UPTO1:%.*]] = insertelement <4 x i10> [[C_UPTO0]], i10 [[C_I1]], i64 1 138; MIN16-NEXT: [[C_UPTO2:%.*]] = insertelement <4 x i10> [[C_UPTO1]], i10 [[C_I2]], i64 2 139; MIN16-NEXT: [[C:%.*]] = insertelement <4 x i10> [[C_UPTO2]], i10 [[C_I3]], i64 3 140; MIN16-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8 141; MIN16-NEXT: ret void 142; 143; MIN32-LABEL: @load_add_store_v4i10( 144; MIN32-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8 145; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x i10> [[A]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2> 146; MIN32-NEXT: [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 3 147; MIN32-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8 148; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x i10> [[B]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2> 149; MIN32-NEXT: [[C_I0:%.*]] = add <3 x i10> [[A_I0]], [[B_I0]] 150; MIN32-NEXT: [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 3 151; MIN32-NEXT: [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]] 152; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <3 x i10> [[C_I0]], <3 x i10> [[C_I0]], <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 153; MIN32-NEXT: [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3 154; MIN32-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8 155; MIN32-NEXT: ret void 156; 157 %a = load <4 x i10>, ptr %pa, align 8 158 %b = load <4 x i10>, ptr %pb, align 8 159 %c = add <4 x i10> %a, %b 160 store <4 x i10> %c, ptr %pa, align 8 161 ret void 162} 163 164define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) { 165; MIN16-LABEL: @select_uniform_condition_v2f16( 166; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 167; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 168; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] 169; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 170; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 171; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] 172; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 173; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 174; MIN16-NEXT: ret <2 x half> [[R]] 175; 176; MIN32-LABEL: @select_uniform_condition_v2f16( 177; MIN32-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]] 178; MIN32-NEXT: ret <2 x half> [[R]] 179; 180 %r = select i1 %cc, <2 x half> %a, <2 x half> %b 181 ret <2 x half> %r 182} 183 184define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) { 185; MIN16-LABEL: @select_uniform_condition_v3f16( 186; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 187; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 188; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] 189; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 190; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 191; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] 192; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 193; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 194; MIN16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] 195; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 196; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 197; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 198; MIN16-NEXT: ret <3 x half> [[R]] 199; 200; MIN32-LABEL: @select_uniform_condition_v3f16( 201; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 202; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 203; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]] 204; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 205; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2 206; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] 207; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 208; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 209; MIN32-NEXT: ret <3 x half> [[R]] 210; 211 %r = select i1 %cc, <3 x half> %a, <3 x half> %b 212 ret <3 x half> %r 213} 214 215define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) { 216; MIN16-LABEL: @select_uniform_condition_v4f16( 217; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 218; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 219; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] 220; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 221; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 222; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] 223; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 224; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 225; MIN16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] 226; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 227; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 228; MIN16-NEXT: [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]] 229; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 230; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 231; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 232; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 233; MIN16-NEXT: ret <4 x half> [[R]] 234; 235; MIN32-LABEL: @select_uniform_condition_v4f16( 236; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 237; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 238; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]] 239; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 240; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 241; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], <2 x half> [[A_I1]], <2 x half> [[B_I1]] 242; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 243; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 244; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 245; MIN32-NEXT: ret <4 x half> [[R]] 246; 247 %r = select i1 %cc, <4 x half> %a, <4 x half> %b 248 ret <4 x half> %r 249} 250 251define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) { 252; CHECK-LABEL: @select_vector_condition_v4f16( 253; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]] 254; CHECK-NEXT: ret <4 x half> [[R]] 255; 256 %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b 257 ret <4 x half> %r 258} 259 260define <2 x half> @unary_v2f16(<2 x half> %a) { 261; MIN16-LABEL: @unary_v2f16( 262; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 263; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] 264; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 265; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] 266; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 267; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 268; MIN16-NEXT: ret <2 x half> [[R]] 269; 270; MIN32-LABEL: @unary_v2f16( 271; MIN32-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]] 272; MIN32-NEXT: ret <2 x half> [[R]] 273; 274 %r = fneg <2 x half> %a 275 ret <2 x half> %r 276} 277 278define <3 x half> @unary_v3f16(<3 x half> %a) { 279; MIN16-LABEL: @unary_v3f16( 280; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 281; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] 282; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 283; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] 284; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 285; MIN16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] 286; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 287; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 288; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 289; MIN16-NEXT: ret <3 x half> [[R]] 290; 291; MIN32-LABEL: @unary_v3f16( 292; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 293; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]] 294; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 295; MIN32-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] 296; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 297; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 298; MIN32-NEXT: ret <3 x half> [[R]] 299; 300 %r = fneg <3 x half> %a 301 ret <3 x half> %r 302} 303 304define <4 x half> @unary_v4f16(<4 x half> %a) { 305; MIN16-LABEL: @unary_v4f16( 306; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 307; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] 308; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 309; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] 310; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 311; MIN16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] 312; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 313; MIN16-NEXT: [[R_I3:%.*]] = fneg half [[A_I3]] 314; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 315; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 316; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 317; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 318; MIN16-NEXT: ret <4 x half> [[R]] 319; 320; MIN32-LABEL: @unary_v4f16( 321; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 322; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]] 323; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 324; MIN32-NEXT: [[R_I1:%.*]] = fneg <2 x half> [[A_I1]] 325; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 326; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 327; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 328; MIN32-NEXT: ret <4 x half> [[R]] 329; 330 %r = fneg <4 x half> %a 331 ret <4 x half> %r 332} 333 334define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) { 335; MIN16-LABEL: @binary_v2f16( 336; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 337; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 338; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] 339; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 340; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 341; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] 342; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 343; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 344; MIN16-NEXT: ret <2 x half> [[R]] 345; 346; MIN32-LABEL: @binary_v2f16( 347; MIN32-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]] 348; MIN32-NEXT: ret <2 x half> [[R]] 349; 350 %r = fadd <2 x half> %a, %b 351 ret <2 x half> %r 352} 353 354define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) { 355; MIN16-LABEL: @binary_v3f16( 356; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 357; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 358; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] 359; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 360; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 361; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] 362; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 363; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 364; MIN16-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]] 365; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 366; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 367; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 368; MIN16-NEXT: ret <3 x half> [[R]] 369; 370; MIN32-LABEL: @binary_v3f16( 371; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 372; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 373; MIN32-NEXT: [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]] 374; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 375; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2 376; MIN32-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] 377; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 378; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 379; MIN32-NEXT: ret <3 x half> [[R]] 380; 381 %r = fadd <3 x half> %a, %b 382 ret <3 x half> %r 383} 384 385define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) { 386; MIN16-LABEL: @binary_v4f16( 387; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 388; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 389; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] 390; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 391; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 392; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] 393; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 394; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 395; MIN16-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]] 396; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 397; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 398; MIN16-NEXT: [[R_I3:%.*]] = fadd half [[A_I3]], [[B_I3]] 399; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 400; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 401; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 402; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 403; MIN16-NEXT: ret <4 x half> [[R]] 404; 405; MIN32-LABEL: @binary_v4f16( 406; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 407; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 408; MIN32-NEXT: [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]] 409; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 410; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 411; MIN32-NEXT: [[R_I1:%.*]] = fadd <2 x half> [[A_I1]], [[B_I1]] 412; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 413; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 414; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 415; MIN32-NEXT: ret <4 x half> [[R]] 416; 417 %r = fadd <4 x half> %a, %b 418 ret <4 x half> %r 419} 420 421define <2 x i16> @fptosi_v2f16(<2 x half> %a) { 422; MIN16-LABEL: @fptosi_v2f16( 423; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 424; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 425; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 426; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 427; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0 428; MIN16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 429; MIN16-NEXT: ret <2 x i16> [[R]] 430; 431; MIN32-LABEL: @fptosi_v2f16( 432; MIN32-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16> 433; MIN32-NEXT: ret <2 x i16> [[R]] 434; 435 %r = fptosi <2 x half> %a to <2 x i16> 436 ret <2 x i16> %r 437} 438 439define <3 x i16> @fptosi_v3f16(<3 x half> %a) { 440; MIN16-LABEL: @fptosi_v3f16( 441; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 442; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 443; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 444; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 445; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 446; MIN16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 447; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0 448; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 449; MIN16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 450; MIN16-NEXT: ret <3 x i16> [[R]] 451; 452; MIN32-LABEL: @fptosi_v3f16( 453; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 454; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16> 455; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 456; MIN32-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 457; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 458; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2 459; MIN32-NEXT: ret <3 x i16> [[R]] 460; 461 %r = fptosi <3 x half> %a to <3 x i16> 462 ret <3 x i16> %r 463} 464 465define <4 x i16> @fptosi_v4f16(<4 x half> %a) { 466; MIN16-LABEL: @fptosi_v4f16( 467; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 468; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 469; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 470; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 471; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 472; MIN16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 473; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 474; MIN16-NEXT: [[R_I3:%.*]] = fptosi half [[A_I3]] to i16 475; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0 476; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 477; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 478; MIN16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3 479; MIN16-NEXT: ret <4 x i16> [[R]] 480; 481; MIN32-LABEL: @fptosi_v4f16( 482; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 483; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16> 484; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 485; MIN32-NEXT: [[R_I1:%.*]] = fptosi <2 x half> [[A_I1]] to <2 x i16> 486; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 487; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 488; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 489; MIN32-NEXT: ret <4 x i16> [[R]] 490; 491 %r = fptosi <4 x half> %a to <4 x i16> 492 ret <4 x i16> %r 493} 494 495define <4 x float> @fpext_v4f16(<4 x half> %a) { 496; MIN16-LABEL: @fpext_v4f16( 497; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 498; MIN16-NEXT: [[R_I0:%.*]] = fpext half [[A_I0]] to float 499; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 500; MIN16-NEXT: [[R_I1:%.*]] = fpext half [[A_I1]] to float 501; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 502; MIN16-NEXT: [[R_I2:%.*]] = fpext half [[A_I2]] to float 503; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 504; MIN16-NEXT: [[R_I3:%.*]] = fpext half [[A_I3]] to float 505; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0 506; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1 507; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2 508; MIN16-NEXT: [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3 509; MIN16-NEXT: ret <4 x float> [[R]] 510; 511; MIN32-LABEL: @fpext_v4f16( 512; MIN32-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float> 513; MIN32-NEXT: ret <4 x float> [[R]] 514; 515 %r = fpext <4 x half> %a to <4 x float> 516 ret <4 x float> %r 517} 518 519define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) { 520; CHECK-LABEL: @icmp_v4f16( 521; CHECK-NEXT: [[R:%.*]] = icmp ugt <4 x i16> [[A:%.*]], [[B:%.*]] 522; CHECK-NEXT: ret <4 x i1> [[R]] 523; 524 %r = icmp ugt <4 x i16> %a, %b 525 ret <4 x i1> %r 526} 527 528define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) { 529; MIN16-LABEL: @gep1_v4( 530; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 531; MIN16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i16 [[A_I0]] 532; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 533; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I1]] 534; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 535; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I2]] 536; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 537; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I3]] 538; MIN16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 539; MIN16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 540; MIN16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 541; MIN16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 542; MIN16-NEXT: ret <4 x ptr> [[P]] 543; 544; MIN32-LABEL: @gep1_v4( 545; MIN32-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]] 546; MIN32-NEXT: ret <4 x ptr> [[P]] 547; 548 %p = getelementptr i32, ptr %base, <4 x i16> %a 549 ret <4 x ptr> %p 550} 551 552define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) { 553; CHECK-LABEL: @gep2_v4( 554; CHECK-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 555; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A:%.*]] 556; CHECK-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 557; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A]] 558; CHECK-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 559; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A]] 560; CHECK-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 561; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A]] 562; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 563; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 564; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 565; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 566; CHECK-NEXT: ret <4 x ptr> [[P]] 567; 568 %p = getelementptr i32, <4 x ptr> %base, i16 %a 569 ret <4 x ptr> %p 570} 571 572define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) { 573; MIN16-LABEL: @gep3_v4( 574; MIN16-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 575; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 576; MIN16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]] 577; MIN16-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 578; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 579; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]] 580; MIN16-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 581; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 582; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]] 583; MIN16-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 584; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 585; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]] 586; MIN16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 587; MIN16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 588; MIN16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 589; MIN16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 590; MIN16-NEXT: ret <4 x ptr> [[P]] 591; 592; MIN32-LABEL: @gep3_v4( 593; MIN32-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]] 594; MIN32-NEXT: ret <4 x ptr> [[P]] 595; 596 %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a 597 ret <4 x ptr> %p 598} 599 600define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) { 601; MIN16-LABEL: @insertelement_v2i16( 602; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 603; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0 604; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 4 605; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 2 606; MIN16-NEXT: ret void 607; 608; MIN32-LABEL: @insertelement_v2i16( 609; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1 610; MIN32-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4 611; MIN32-NEXT: ret void 612; 613 %r = insertelement <2 x i16> %a, i16 %b, i64 1 614 store <2 x i16> %r, ptr %p 615 ret void 616} 617 618define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) { 619; MIN16-LABEL: @insertelement_v3i16( 620; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 621; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 622; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0 623; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1 624; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 625; MIN16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 626; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I2]], align 4 627; MIN16-NEXT: ret void 628; 629; MIN32-LABEL: @insertelement_v3i16( 630; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1 631; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x i16> [[A:%.*]], <3 x i16> poison, <2 x i32> <i32 0, i32 1> 632; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 633; MIN32-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 4 634; MIN32-NEXT: ret void 635; 636 %r = insertelement <3 x i16> %a, i16 %b, i64 2 637 store <3 x i16> %r, ptr %p 638 ret void 639} 640 641define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) { 642; MIN16-LABEL: @insertelement_v4i16( 643; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 644; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 645; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3 646; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 647; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 648; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 649; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 650; MIN16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 651; MIN16-NEXT: store i16 [[A_I2]], ptr [[P_I2]], align 4 652; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I3]], align 2 653; MIN16-NEXT: ret void 654; 655; MIN32-LABEL: @insertelement_v4i16( 656; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1 657; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 658; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 659; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1 660; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 661; MIN32-NEXT: store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4 662; MIN32-NEXT: ret void 663; 664 %r = insertelement <4 x i16> %a, i16 %b, i64 3 665 store <4 x i16> %r, ptr %p 666 ret void 667} 668 669define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) { 670; MIN16-LABEL: @load_insertelement_v2i16( 671; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4 672; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0 673; MIN16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1 674; MIN16-NEXT: ret <2 x i16> [[R]] 675; 676; MIN32-LABEL: @load_insertelement_v2i16( 677; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4 678; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1 679; MIN32-NEXT: ret <2 x i16> [[R]] 680; 681 %a = load <2 x i16>, ptr %pa 682 %r = insertelement <2 x i16> %a, i16 %b, i64 1 683 ret <2 x i16> %r 684} 685 686define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) { 687; MIN16-LABEL: @load_insertelement_v3i16( 688; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 689; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 690; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 691; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0 692; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 693; MIN16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2 694; MIN16-NEXT: ret <3 x i16> [[R]] 695; 696; MIN32-LABEL: @load_insertelement_v3i16( 697; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 698; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 699; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2 700; MIN32-NEXT: ret <3 x i16> [[R]] 701; 702 %a = load <3 x i16>, ptr %pa 703 %r = insertelement <3 x i16> %a, i16 %b, i64 2 704 ret <3 x i16> %r 705} 706 707define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) { 708; MIN16-LABEL: @load_insertelement_v4i16( 709; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 710; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 711; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 712; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 713; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 714; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0 715; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 716; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2 717; MIN16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3 718; MIN16-NEXT: ret <4 x i16> [[R]] 719; 720; MIN32-LABEL: @load_insertelement_v4i16( 721; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 722; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 723; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 724; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1 725; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 726; MIN32-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 727; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 728; MIN32-NEXT: ret <4 x i16> [[R]] 729; 730 %a = load <4 x i16>, ptr %pa 731 %r = insertelement <4 x i16> %a, i16 %b, i64 3 732 ret <4 x i16> %r 733} 734 735define void @shufflevector_grow(ptr %pa, ptr %pb) { 736; MIN16-LABEL: @shufflevector_grow( 737; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 2 738; MIN16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 739; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 740; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA]], align 4 741; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 742; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 743; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 4 744; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 745; MIN16-NEXT: store i16 [[A_I0]], ptr [[PA]], align 8 746; MIN16-NEXT: store i16 [[A_I1]], ptr [[PA_I1]], align 2 747; MIN16-NEXT: store i16 [[B_I0]], ptr [[PA_I2]], align 4 748; MIN16-NEXT: store i16 [[B_I1]], ptr [[PA_I3]], align 2 749; MIN16-NEXT: ret void 750; 751; MIN32-LABEL: @shufflevector_grow( 752; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA:%.*]], i32 1 753; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA]], align 4 754; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4 755; MIN32-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 756; MIN32-NEXT: [[R_I0:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 757; MIN32-NEXT: store <2 x i16> [[R_I0]], ptr [[PA]], align 8 758; MIN32-NEXT: [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 759; MIN32-NEXT: store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4 760; MIN32-NEXT: ret void 761; 762 %a = load <2 x i16>, ptr %pa 763 %b = load <2 x i16>, ptr %pb 764 %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 765 store <4 x i16> %r, ptr %pa 766 ret void 767} 768 769define void @shufflevector_shrink(ptr %pa) { 770; MIN16-LABEL: @shufflevector_shrink( 771; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1 772; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 773; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 774; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 775; MIN16-NEXT: store i16 [[A_I1]], ptr [[PA]], align 4 776; MIN16-NEXT: store i16 [[A_I2]], ptr [[PA_I1]], align 2 777; MIN16-NEXT: ret void 778; 779; MIN32-LABEL: @shufflevector_shrink( 780; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 781; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 782; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 783; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 784; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> [[A_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 785; MIN32-NEXT: [[A:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 786; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2> 787; MIN32-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4 788; MIN32-NEXT: ret void 789; 790 %a = load <4 x i16>, ptr %pa 791 %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2> 792 store <2 x i16> %r, ptr %pa 793 ret void 794} 795 796define void @phi_v2f16(ptr %base, i64 %bound) { 797; MIN16-LABEL: @phi_v2f16( 798; MIN16-NEXT: entry: 799; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 800; MIN16-NEXT: br label [[LOOP:%.*]] 801; MIN16: loop: 802; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] 803; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] 804; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 805; MIN16-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]] 806; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 807; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 808; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 809; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] 810; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] 811; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 812; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 813; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 814; MIN16: end: 815; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 4 816; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 817; MIN16-NEXT: ret void 818; 819; MIN32-LABEL: @phi_v2f16( 820; MIN32-NEXT: entry: 821; MIN32-NEXT: br label [[LOOP:%.*]] 822; MIN32: loop: 823; MIN32-NEXT: [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ] 824; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 825; MIN32-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]] 826; MIN32-NEXT: [[A:%.*]] = load <2 x half>, ptr [[P]], align 2 827; MIN32-NEXT: [[X_NEXT]] = fadd <2 x half> [[X]], [[A]] 828; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 829; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 830; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 831; MIN32: end: 832; MIN32-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4 833; MIN32-NEXT: ret void 834; 835entry: 836 br label %loop 837 838loop: 839 %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] 840 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] 841 %p = getelementptr <2 x half>, ptr %base, i64 %idx 842 %a = load <2 x half>, ptr %p, align 2 843 %x.next = fadd <2 x half> %x, %a 844 %idx.next = add i64 %idx, 1 845 %cc = icmp ult i64 %idx.next, %bound 846 br i1 %cc, label %loop, label %end 847 848end: 849 store <2 x half> %x.next, ptr %base 850 ret void 851} 852 853define void @phi_v3f16(ptr %base, i64 %bound) { 854; MIN16-LABEL: @phi_v3f16( 855; MIN16-NEXT: entry: 856; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 857; MIN16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 858; MIN16-NEXT: br label [[LOOP:%.*]] 859; MIN16: loop: 860; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] 861; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] 862; MIN16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] 863; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 864; MIN16-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]] 865; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 866; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 867; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 868; MIN16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 869; MIN16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 870; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] 871; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] 872; MIN16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] 873; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 874; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 875; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 876; MIN16: end: 877; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 878; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 879; MIN16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 880; MIN16-NEXT: ret void 881; 882; MIN32-LABEL: @phi_v3f16( 883; MIN32-NEXT: entry: 884; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1 885; MIN32-NEXT: br label [[LOOP:%.*]] 886; MIN32: loop: 887; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] 888; MIN32-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] 889; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 890; MIN32-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]] 891; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2 892; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1 893; MIN32-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 894; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]] 895; MIN32-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] 896; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 897; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 898; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 899; MIN32: end: 900; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8 901; MIN32-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 902; MIN32-NEXT: ret void 903; 904entry: 905 br label %loop 906 907loop: 908 %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] 909 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] 910 %p = getelementptr <3 x half>, ptr %base, i64 %idx 911 %a = load <3 x half>, ptr %p, align 2 912 %x.next = fadd <3 x half> %x, %a 913 %idx.next = add i64 %idx, 1 914 %cc = icmp ult i64 %idx.next, %bound 915 br i1 %cc, label %loop, label %end 916 917end: 918 store <3 x half> %x.next, ptr %base 919 ret void 920} 921 922define void @phi_v4f16(ptr %base, i64 %bound) { 923; MIN16-LABEL: @phi_v4f16( 924; MIN16-NEXT: entry: 925; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 926; MIN16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 927; MIN16-NEXT: [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3 928; MIN16-NEXT: br label [[LOOP:%.*]] 929; MIN16: loop: 930; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] 931; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] 932; MIN16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] 933; MIN16-NEXT: [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ] 934; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 935; MIN16-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]] 936; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 937; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 938; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 939; MIN16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 940; MIN16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 941; MIN16-NEXT: [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3 942; MIN16-NEXT: [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2 943; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] 944; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] 945; MIN16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] 946; MIN16-NEXT: [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]] 947; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 948; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 949; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 950; MIN16: end: 951; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 952; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 953; MIN16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 954; MIN16-NEXT: store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2 955; MIN16-NEXT: ret void 956; 957; MIN32-LABEL: @phi_v4f16( 958; MIN32-NEXT: entry: 959; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1 960; MIN32-NEXT: br label [[LOOP:%.*]] 961; MIN32: loop: 962; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] 963; MIN32-NEXT: [[X_I1:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] 964; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] 965; MIN32-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]] 966; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2 967; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1 968; MIN32-NEXT: [[A_I1:%.*]] = load <2 x half>, ptr [[P_I1]], align 2 969; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]] 970; MIN32-NEXT: [[X_NEXT_I1]] = fadd <2 x half> [[X_I1]], [[A_I1]] 971; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 972; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] 973; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] 974; MIN32: end: 975; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8 976; MIN32-NEXT: store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 977; MIN32-NEXT: ret void 978; 979entry: 980 br label %loop 981 982loop: 983 %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] 984 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] 985 %p = getelementptr <4 x half>, ptr %base, i64 %idx 986 %a = load <4 x half>, ptr %p, align 2 987 %x.next = fadd <4 x half> %x, %a 988 %idx.next = add i64 %idx, 1 989 %cc = icmp ult i64 %idx.next, %bound 990 br i1 %cc, label %loop, label %end 991 992end: 993 store <4 x half> %x.next, ptr %base 994 ret void 995} 996 997define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) { 998; MIN16-LABEL: @call_v2f16( 999; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 1000; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 1001; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) 1002; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 1003; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 1004; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) 1005; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 1006; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 1007; MIN16-NEXT: ret <2 x half> [[R]] 1008; 1009; MIN32-LABEL: @call_v2f16( 1010; MIN32-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]]) 1011; MIN32-NEXT: ret <2 x half> [[R]] 1012; 1013 %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) 1014 ret <2 x half> %r 1015} 1016 1017define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) { 1018; MIN16-LABEL: @call_v3f16( 1019; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 1020; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 1021; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) 1022; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 1023; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 1024; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) 1025; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 1026; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 1027; MIN16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) 1028; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 1029; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 1030; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 1031; MIN16-NEXT: ret <3 x half> [[R]] 1032; 1033; MIN32-LABEL: @call_v3f16( 1034; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 1035; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1> 1036; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]]) 1037; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 1038; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2 1039; MIN32-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) 1040; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison> 1041; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 1042; MIN32-NEXT: ret <3 x half> [[R]] 1043; 1044 %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b) 1045 ret <3 x half> %r 1046} 1047 1048define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) { 1049; MIN16-LABEL: @call_v4f16( 1050; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 1051; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 1052; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) 1053; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 1054; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 1055; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) 1056; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 1057; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 1058; MIN16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) 1059; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 1060; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 1061; MIN16-NEXT: [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]]) 1062; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 1063; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 1064; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 1065; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 1066; MIN16-NEXT: ret <4 x half> [[R]] 1067; 1068; MIN32-LABEL: @call_v4f16( 1069; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 1070; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1> 1071; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]]) 1072; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 1073; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3> 1074; MIN32-NEXT: [[R_I1:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I1]], <2 x half> [[B_I1]]) 1075; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1076; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1077; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1078; MIN32-NEXT: ret <4 x half> [[R]] 1079; 1080 %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) 1081 ret <4 x half> %r 1082} 1083 1084declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) 1085declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) 1086declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) 1087