1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s 5 6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 7; GFX7-LABEL: @uadd_sat_v2i16( 8; GFX7-NEXT: bb: 9; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 10; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 11; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 12; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 13; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 14; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 15; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 16; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 17; GFX7-NEXT: ret <2 x i16> [[INS_1]] 18; 19; GFX8-LABEL: @uadd_sat_v2i16( 20; GFX8-NEXT: bb: 21; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 22; GFX8-NEXT: ret <2 x i16> [[TMP0]] 23; 24; GFX9-LABEL: @uadd_sat_v2i16( 25; GFX9-NEXT: bb: 26; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 27; GFX9-NEXT: ret <2 x i16> [[TMP0]] 28; 29bb: 30 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 31 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 32 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 33 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 34 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 35 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 36 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 37 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 38 ret <2 x i16> %ins.1 39} 40 41define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 42; GFX7-LABEL: @usub_sat_v2i16( 43; GFX7-NEXT: bb: 44; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 45; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 46; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 47; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 48; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 49; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 50; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 51; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 52; GFX7-NEXT: ret <2 x i16> [[INS_1]] 53; 54; GFX8-LABEL: @usub_sat_v2i16( 55; GFX8-NEXT: bb: 56; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 57; GFX8-NEXT: ret <2 x i16> [[TMP0]] 58; 59; GFX9-LABEL: @usub_sat_v2i16( 60; GFX9-NEXT: bb: 61; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 62; GFX9-NEXT: ret <2 x i16> [[TMP0]] 63; 64bb: 65 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 66 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 67 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 68 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 69 %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0) 70 %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1) 71 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 72 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 73 ret <2 x i16> %ins.1 74} 75 76define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 77; GFX7-LABEL: @sadd_sat_v2i16( 78; GFX7-NEXT: bb: 79; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 80; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 81; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 82; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 83; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 84; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 85; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 86; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 87; GFX7-NEXT: ret <2 x i16> [[INS_1]] 88; 89; GFX8-LABEL: @sadd_sat_v2i16( 90; GFX8-NEXT: bb: 91; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 92; GFX8-NEXT: ret <2 x i16> [[TMP0]] 93; 94; GFX9-LABEL: @sadd_sat_v2i16( 95; GFX9-NEXT: bb: 96; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 97; GFX9-NEXT: ret <2 x i16> [[TMP0]] 98; 99bb: 100 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 101 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 102 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 103 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 104 %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 105 %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 106 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 107 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 108 ret <2 x i16> %ins.1 109} 110 111define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 112; GFX7-LABEL: @ssub_sat_v2i16( 113; GFX7-NEXT: bb: 114; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 115; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 116; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 117; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 118; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 119; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 120; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 121; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 122; GFX7-NEXT: ret <2 x i16> [[INS_1]] 123; 124; GFX8-LABEL: @ssub_sat_v2i16( 125; GFX8-NEXT: bb: 126; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 127; GFX8-NEXT: ret <2 x i16> [[TMP0]] 128; 129; GFX9-LABEL: @ssub_sat_v2i16( 130; GFX9-NEXT: bb: 131; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 132; GFX9-NEXT: ret <2 x i16> [[TMP0]] 133; 134bb: 135 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 136 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 137 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 138 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 139 %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0) 140 %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1) 141 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 142 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 143 ret <2 x i16> %ins.1 144} 145 146define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 147; GCN-LABEL: @uadd_sat_v2i32( 148; GCN-NEXT: bb: 149; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 150; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 151; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 152; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 153; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 154; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 155; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 156; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 157; GCN-NEXT: ret <2 x i32> [[INS_1]] 158; 159bb: 160 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 161 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 162 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 163 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 164 %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0) 165 %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1) 166 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 167 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 168 ret <2 x i32> %ins.1 169} 170 171define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 172; GCN-LABEL: @usub_sat_v2i32( 173; GCN-NEXT: bb: 174; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 175; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 176; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 177; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 178; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 179; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 180; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 181; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 182; GCN-NEXT: ret <2 x i32> [[INS_1]] 183; 184bb: 185 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 186 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 187 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 188 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 189 %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0) 190 %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1) 191 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 192 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 193 ret <2 x i32> %ins.1 194} 195 196define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 197; GCN-LABEL: @sadd_sat_v2i32( 198; GCN-NEXT: bb: 199; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 200; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 201; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 202; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 203; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 204; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 205; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 206; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 207; GCN-NEXT: ret <2 x i32> [[INS_1]] 208; 209bb: 210 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 211 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 212 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 213 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 214 %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0) 215 %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1) 216 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 217 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 218 ret <2 x i32> %ins.1 219} 220 221define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 222; GCN-LABEL: @ssub_sat_v2i32( 223; GCN-NEXT: bb: 224; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 225; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 226; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 227; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 228; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 229; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 230; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 231; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 232; GCN-NEXT: ret <2 x i32> [[INS_1]] 233; 234bb: 235 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 236 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 237 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 238 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 239 %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0) 240 %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1) 241 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 242 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 243 ret <2 x i32> %ins.1 244} 245 246define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { 247; GFX7-LABEL: @uadd_sat_v3i16( 248; GFX7-NEXT: bb: 249; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 250; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 251; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 252; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 253; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 254; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 255; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 256; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 257; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 258; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0 259; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 260; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 261; GFX7-NEXT: ret <3 x i16> [[INS_2]] 262; 263; GFX8-LABEL: @uadd_sat_v3i16( 264; GFX8-NEXT: bb: 265; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 266; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 267; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1> 268; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1> 269; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 270; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 271; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison> 272; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 273; GFX8-NEXT: ret <3 x i16> [[INS_2]] 274; 275; GFX9-LABEL: @uadd_sat_v3i16( 276; GFX9-NEXT: bb: 277; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 278; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 279; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1> 280; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1> 281; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 282; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 283; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison> 284; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 285; GFX9-NEXT: ret <3 x i16> [[INS_2]] 286; 287bb: 288 %arg0.0 = extractelement <3 x i16> %arg0, i64 0 289 %arg0.1 = extractelement <3 x i16> %arg0, i64 1 290 %arg0.2 = extractelement <3 x i16> %arg0, i64 2 291 %arg1.0 = extractelement <3 x i16> %arg1, i64 0 292 %arg1.1 = extractelement <3 x i16> %arg1, i64 1 293 %arg1.2 = extractelement <3 x i16> %arg1, i64 2 294 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 295 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 296 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) 297 %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0 298 %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1 299 %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2 300 ret <3 x i16> %ins.2 301} 302 303define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { 304; GFX7-LABEL: @uadd_sat_v4i16( 305; GFX7-NEXT: bb: 306; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 307; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 308; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2 309; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 310; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 311; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 312; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2 313; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 314; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 315; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 316; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 317; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) 318; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0 319; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 320; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 321; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 322; GFX7-NEXT: ret <4 x i16> [[INS_3]] 323; 324; GFX8-LABEL: @uadd_sat_v4i16( 325; GFX8-NEXT: bb: 326; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 327; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 328; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 329; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 330; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 331; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) 332; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 333; GFX8-NEXT: ret <4 x i16> [[INS_31]] 334; 335; GFX9-LABEL: @uadd_sat_v4i16( 336; GFX9-NEXT: bb: 337; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 338; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> 339; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 340; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 341; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> 342; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) 343; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 344; GFX9-NEXT: ret <4 x i16> [[INS_31]] 345; 346bb: 347 %arg0.0 = extractelement <4 x i16> %arg0, i64 0 348 %arg0.1 = extractelement <4 x i16> %arg0, i64 1 349 %arg0.2 = extractelement <4 x i16> %arg0, i64 2 350 %arg0.3 = extractelement <4 x i16> %arg0, i64 3 351 %arg1.0 = extractelement <4 x i16> %arg1, i64 0 352 %arg1.1 = extractelement <4 x i16> %arg1, i64 1 353 %arg1.2 = extractelement <4 x i16> %arg1, i64 2 354 %arg1.3 = extractelement <4 x i16> %arg1, i64 3 355 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 356 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 357 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) 358 %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3) 359 %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0 360 %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1 361 %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2 362 %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3 363 ret <4 x i16> %ins.3 364} 365 366declare i16 @llvm.uadd.sat.i16(i16, i16) #0 367declare i16 @llvm.usub.sat.i16(i16, i16) #0 368declare i16 @llvm.sadd.sat.i16(i16, i16) #0 369declare i16 @llvm.ssub.sat.i16(i16, i16) #0 370 371declare i32 @llvm.uadd.sat.i32(i32, i32) #0 372declare i32 @llvm.usub.sat.i32(i32, i32) #0 373declare i32 @llvm.sadd.sat.i32(i32, i32) #0 374declare i32 @llvm.ssub.sat.i32(i32, i32) #0 375 376attributes #0 = { nounwind readnone speculatable willreturn } 377