1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s 5 6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 7; GFX7-LABEL: @uadd_sat_v2i16( 8; GFX7-NEXT: bb: 9; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 10; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 11; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 12; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 13; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 14; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 15; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 16; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 17; GFX7-NEXT: ret <2 x i16> [[INS_1]] 18; 19; GFX8-LABEL: @uadd_sat_v2i16( 20; GFX8-NEXT: bb: 21; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 22; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 23; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 24; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 25; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 26; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 27; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 28; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 29; GFX8-NEXT: ret <2 x i16> [[INS_1]] 30; 31; GFX9-LABEL: @uadd_sat_v2i16( 32; GFX9-NEXT: bb: 33; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 34; GFX9-NEXT: ret <2 x i16> [[TMP0]] 35; 36bb: 37 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 38 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 39 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 40 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 41 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) 42 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) 43 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 44 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 45 ret <2 x i16> %ins.1 46} 47 48define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 49; GFX7-LABEL: @usub_sat_v2i16( 50; GFX7-NEXT: bb: 51; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 52; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 53; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 54; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 55; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 56; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 57; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 58; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 59; GFX7-NEXT: ret <2 x i16> [[INS_1]] 60; 61; GFX8-LABEL: @usub_sat_v2i16( 62; GFX8-NEXT: bb: 63; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 64; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 65; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 66; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 67; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 68; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 69; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 70; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 71; GFX8-NEXT: ret <2 x i16> [[INS_1]] 72; 73; GFX9-LABEL: @usub_sat_v2i16( 74; GFX9-NEXT: bb: 75; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 76; GFX9-NEXT: ret <2 x i16> [[TMP0]] 77; 78bb: 79 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 80 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 81 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 82 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 83 %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0) 84 %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1) 85 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 86 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 87 ret <2 x i16> %ins.1 88} 89 90define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 91; GFX7-LABEL: @sadd_sat_v2i16( 92; GFX7-NEXT: bb: 93; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 94; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 95; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 96; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 97; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 98; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 99; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 100; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 101; GFX7-NEXT: ret <2 x i16> [[INS_1]] 102; 103; GFX8-LABEL: @sadd_sat_v2i16( 104; GFX8-NEXT: bb: 105; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 106; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 107; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 108; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 109; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 110; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 111; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 112; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 113; GFX8-NEXT: ret <2 x i16> [[INS_1]] 114; 115; GFX9-LABEL: @sadd_sat_v2i16( 116; GFX9-NEXT: bb: 117; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 118; GFX9-NEXT: ret <2 x i16> [[TMP0]] 119; 120bb: 121 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 122 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 123 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 124 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 125 %add.0 = call i16 @llvm.smin.i16(i16 %arg0.0, i16 %arg1.0) 126 %add.1 = call i16 @llvm.smin.i16(i16 %arg0.1, i16 %arg1.1) 127 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 128 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 129 ret <2 x i16> %ins.1 130} 131 132define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 133; GFX7-LABEL: @ssub_sat_v2i16( 134; GFX7-NEXT: bb: 135; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 136; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 137; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 138; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 139; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 140; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 141; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 142; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 143; GFX7-NEXT: ret <2 x i16> [[INS_1]] 144; 145; GFX8-LABEL: @ssub_sat_v2i16( 146; GFX8-NEXT: bb: 147; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 148; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 149; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 150; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 151; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 152; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 153; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 154; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 155; GFX8-NEXT: ret <2 x i16> [[INS_1]] 156; 157; GFX9-LABEL: @ssub_sat_v2i16( 158; GFX9-NEXT: bb: 159; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 160; GFX9-NEXT: ret <2 x i16> [[TMP0]] 161; 162bb: 163 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 164 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 165 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 166 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 167 %add.0 = call i16 @llvm.smax.i16(i16 %arg0.0, i16 %arg1.0) 168 %add.1 = call i16 @llvm.smax.i16(i16 %arg0.1, i16 %arg1.1) 169 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 170 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 171 ret <2 x i16> %ins.1 172} 173 174define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 175; GCN-LABEL: @uadd_sat_v2i32( 176; GCN-NEXT: bb: 177; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 178; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 179; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 180; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 181; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 182; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 183; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 184; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 185; GCN-NEXT: ret <2 x i32> [[INS_1]] 186; 187bb: 188 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 189 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 190 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 191 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 192 %add.0 = call i32 @llvm.umin.i32(i32 %arg0.0, i32 %arg1.0) 193 %add.1 = call i32 @llvm.umin.i32(i32 %arg0.1, i32 %arg1.1) 194 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 195 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 196 ret <2 x i32> %ins.1 197} 198 199define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 200; GCN-LABEL: @usub_sat_v2i32( 201; GCN-NEXT: bb: 202; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 203; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 204; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 205; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 206; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 207; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 208; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 209; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 210; GCN-NEXT: ret <2 x i32> [[INS_1]] 211; 212bb: 213 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 214 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 215 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 216 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 217 %add.0 = call i32 @llvm.umax.i32(i32 %arg0.0, i32 %arg1.0) 218 %add.1 = call i32 @llvm.umax.i32(i32 %arg0.1, i32 %arg1.1) 219 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 220 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 221 ret <2 x i32> %ins.1 222} 223 224define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 225; GCN-LABEL: @sadd_sat_v2i32( 226; GCN-NEXT: bb: 227; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 228; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 229; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 230; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 231; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 232; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 233; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 234; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 235; GCN-NEXT: ret <2 x i32> [[INS_1]] 236; 237bb: 238 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 239 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 240 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 241 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 242 %add.0 = call i32 @llvm.smin.i32(i32 %arg0.0, i32 %arg1.0) 243 %add.1 = call i32 @llvm.smin.i32(i32 %arg0.1, i32 %arg1.1) 244 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 245 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 246 ret <2 x i32> %ins.1 247} 248 249define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 250; GCN-LABEL: @ssub_sat_v2i32( 251; GCN-NEXT: bb: 252; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 253; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 254; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 255; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 256; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 257; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 258; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 259; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 260; GCN-NEXT: ret <2 x i32> [[INS_1]] 261; 262bb: 263 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 264 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 265 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 266 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 267 %add.0 = call i32 @llvm.smax.i32(i32 %arg0.0, i32 %arg1.0) 268 %add.1 = call i32 @llvm.smax.i32(i32 %arg0.1, i32 %arg1.1) 269 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 270 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 271 ret <2 x i32> %ins.1 272} 273 274define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { 275; GFX7-LABEL: @uadd_sat_v3i16( 276; GFX7-NEXT: bb: 277; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 278; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 279; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 280; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 281; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 282; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 283; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 284; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 285; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 286; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0 287; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 288; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 289; GFX7-NEXT: ret <3 x i16> [[INS_2]] 290; 291; GFX8-LABEL: @uadd_sat_v3i16( 292; GFX8-NEXT: bb: 293; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 294; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 295; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 296; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 297; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 298; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 299; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 300; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 301; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 302; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0 303; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 304; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 305; GFX8-NEXT: ret <3 x i16> [[INS_2]] 306; 307; GFX9-LABEL: @uadd_sat_v3i16( 308; GFX9-NEXT: bb: 309; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 310; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 311; GFX9-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]]) 312; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 313; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2 314; GFX9-NEXT: ret <3 x i16> [[INS_2]] 315; 316bb: 317 %arg0.0 = extractelement <3 x i16> %arg0, i64 0 318 %arg0.1 = extractelement <3 x i16> %arg0, i64 1 319 %arg0.2 = extractelement <3 x i16> %arg0, i64 2 320 %arg1.0 = extractelement <3 x i16> %arg1, i64 0 321 %arg1.1 = extractelement <3 x i16> %arg1, i64 1 322 %arg1.2 = extractelement <3 x i16> %arg1, i64 2 323 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) 324 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) 325 %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2) 326 %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0 327 %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1 328 %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2 329 ret <3 x i16> %ins.2 330} 331 332define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { 333; GFX7-LABEL: @uadd_sat_v4i16( 334; GFX7-NEXT: bb: 335; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 336; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 337; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2 338; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 339; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 340; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 341; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2 342; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 343; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 344; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 345; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 346; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) 347; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0 348; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 349; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 350; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 351; GFX7-NEXT: ret <4 x i16> [[INS_3]] 352; 353; GFX8-LABEL: @uadd_sat_v4i16( 354; GFX8-NEXT: bb: 355; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 356; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 357; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 358; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 359; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 360; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 361; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]]) 362; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 363; GFX8-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0 364; GFX8-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 365; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 366; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 367; GFX8-NEXT: ret <4 x i16> [[INS_31]] 368; 369; GFX9-LABEL: @uadd_sat_v4i16( 370; GFX9-NEXT: bb: 371; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]]) 372; GFX9-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]]) 373; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 374; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 375; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 376; GFX9-NEXT: ret <4 x i16> [[INS_31]] 377; 378bb: 379 %arg0.0 = extractelement <4 x i16> %arg0, i64 0 380 %arg0.1 = extractelement <4 x i16> %arg0, i64 1 381 %arg0.2 = extractelement <4 x i16> %arg0, i64 2 382 %arg0.3 = extractelement <4 x i16> %arg0, i64 3 383 %arg1.0 = extractelement <4 x i16> %arg1, i64 0 384 %arg1.1 = extractelement <4 x i16> %arg1, i64 1 385 %arg1.2 = extractelement <4 x i16> %arg1, i64 2 386 %arg1.3 = extractelement <4 x i16> %arg1, i64 3 387 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) 388 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) 389 %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2) 390 %add.3 = call i16 @llvm.umin.i16(i16 %arg0.3, i16 %arg1.3) 391 %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0 392 %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1 393 %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2 394 %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3 395 ret <4 x i16> %ins.3 396} 397