1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE 3; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE 4 5define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { 6; CHECK-LE-LABEL: test_add1: 7; CHECK-LE: # %bb.0: # %entry 8; CHECK-LE-NEXT: clrldi 3, 5, 32 9; CHECK-LE-NEXT: vextubrx 3, 3, 2 10; CHECK-LE-NEXT: add 3, 3, 6 11; CHECK-LE-NEXT: clrldi 3, 3, 56 12; CHECK-LE-NEXT: blr 13; 14; CHECK-BE-LABEL: test_add1: 15; CHECK-BE: # %bb.0: # %entry 16; CHECK-BE-NEXT: clrldi 3, 5, 32 17; CHECK-BE-NEXT: vextublx 3, 3, 2 18; CHECK-BE-NEXT: add 3, 3, 6 19; CHECK-BE-NEXT: clrldi 3, 3, 56 20; CHECK-BE-NEXT: blr 21entry: 22 %vecext = extractelement <16 x i8> %a, i32 %index 23 %conv = zext i8 %vecext to i32 24 %conv1 = zext i8 %c to i32 25 %add = add nuw nsw i32 %conv, %conv1 26 %conv2 = trunc i32 %add to i8 27 ret i8 %conv2 28} 29 30define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { 31; CHECK-LE-LABEL: test_add2: 32; CHECK-LE: # %bb.0: # %entry 33; CHECK-LE-NEXT: clrldi 3, 5, 32 34; CHECK-LE-NEXT: vextubrx 3, 3, 2 35; CHECK-LE-NEXT: add 3, 3, 6 36; CHECK-LE-NEXT: extsb 3, 3 37; CHECK-LE-NEXT: blr 38; 39; CHECK-BE-LABEL: test_add2: 40; CHECK-BE: # %bb.0: # %entry 41; CHECK-BE-NEXT: clrldi 3, 5, 32 42; CHECK-BE-NEXT: vextublx 3, 3, 2 43; CHECK-BE-NEXT: add 3, 3, 6 44; CHECK-BE-NEXT: extsb 3, 3 45; CHECK-BE-NEXT: blr 46entry: 47 %vecext = extractelement <16 x i8> %a, i32 %index 48 %conv3 = zext i8 %vecext to i32 49 %conv14 = zext i8 %c to i32 50 %add = add nuw nsw i32 %conv3, %conv14 51 %conv2 = trunc i32 %add to i8 52 ret i8 %conv2 53} 54 55define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { 56; CHECK-LE-LABEL: test_add3: 57; CHECK-LE: # %bb.0: # %entry 58; CHECK-LE-NEXT: clrldi 3, 5, 32 59; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 60; CHECK-LE-NEXT: vextuhrx 3, 3, 2 61; CHECK-LE-NEXT: add 3, 3, 6 62; CHECK-LE-NEXT: clrldi 3, 3, 48 63; CHECK-LE-NEXT: blr 64; 65; CHECK-BE-LABEL: test_add3: 66; CHECK-BE: # %bb.0: # %entry 67; CHECK-BE-NEXT: clrldi 3, 5, 32 68; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 69; CHECK-BE-NEXT: vextuhlx 3, 3, 2 70; CHECK-BE-NEXT: add 3, 3, 6 71; CHECK-BE-NEXT: clrldi 3, 3, 48 72; CHECK-BE-NEXT: blr 73entry: 74 %vecext = extractelement <8 x i16> %a, i32 %index 75 %conv = zext i16 %vecext to i32 76 %conv1 = zext i16 %c to i32 77 %add = add nuw nsw i32 %conv, %conv1 78 %conv2 = trunc i32 %add to i16 79 ret i16 %conv2 80} 81 82define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { 83; CHECK-LE-LABEL: test_add4: 84; CHECK-LE: # %bb.0: # %entry 85; CHECK-LE-NEXT: clrldi 3, 5, 32 86; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 87; CHECK-LE-NEXT: vextuhrx 3, 3, 2 88; CHECK-LE-NEXT: add 3, 3, 6 89; CHECK-LE-NEXT: extsh 3, 3 90; CHECK-LE-NEXT: blr 91; 92; CHECK-BE-LABEL: test_add4: 93; CHECK-BE: # %bb.0: # %entry 94; CHECK-BE-NEXT: clrldi 3, 5, 32 95; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 96; CHECK-BE-NEXT: vextuhlx 3, 3, 2 97; CHECK-BE-NEXT: add 3, 3, 6 98; CHECK-BE-NEXT: extsh 3, 3 99; CHECK-BE-NEXT: blr 100entry: 101 %vecext = extractelement <8 x i16> %a, i32 %index 102 %conv5 = zext i16 %vecext to i32 103 %conv16 = zext i16 %c to i32 104 %add = add nuw nsw i32 %conv5, %conv16 105 %conv2 = trunc i32 %add to i16 106 ret i16 %conv2 107} 108 109define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { 110; CHECK-LE-LABEL: test_add5: 111; CHECK-LE: # %bb.0: # %entry 112; CHECK-LE-NEXT: clrldi 3, 5, 32 113; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 114; CHECK-LE-NEXT: vextuwrx 3, 3, 2 115; CHECK-LE-NEXT: add 3, 3, 6 116; CHECK-LE-NEXT: clrldi 3, 3, 32 117; CHECK-LE-NEXT: blr 118; 119; CHECK-BE-LABEL: test_add5: 120; CHECK-BE: # %bb.0: # %entry 121; CHECK-BE-NEXT: clrldi 3, 5, 32 122; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 123; CHECK-BE-NEXT: vextuwlx 3, 3, 2 124; CHECK-BE-NEXT: add 3, 3, 6 125; CHECK-BE-NEXT: clrldi 3, 3, 32 126; CHECK-BE-NEXT: blr 127entry: 128 %vecext = extractelement <4 x i32> %a, i32 %index 129 %add = add i32 %vecext, %c 130 ret i32 %add 131} 132 133define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { 134; CHECK-LE-LABEL: test_add6: 135; CHECK-LE: # %bb.0: # %entry 136; CHECK-LE-NEXT: clrldi 3, 5, 32 137; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 138; CHECK-LE-NEXT: vextuwrx 3, 3, 2 139; CHECK-LE-NEXT: add 3, 3, 6 140; CHECK-LE-NEXT: extsw 3, 3 141; CHECK-LE-NEXT: blr 142; 143; CHECK-BE-LABEL: test_add6: 144; CHECK-BE: # %bb.0: # %entry 145; CHECK-BE-NEXT: clrldi 3, 5, 32 146; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 147; CHECK-BE-NEXT: vextuwlx 3, 3, 2 148; CHECK-BE-NEXT: add 3, 3, 6 149; CHECK-BE-NEXT: extsw 3, 3 150; CHECK-BE-NEXT: blr 151entry: 152 %vecext = extractelement <4 x i32> %a, i32 %index 153 %add = add nsw i32 %vecext, %c 154 ret i32 %add 155} 156 157; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx 158define zeroext i32 @test7(<4 x i32> %a) { 159; CHECK-LE-LABEL: test7: 160; CHECK-LE: # %bb.0: # %entry 161; CHECK-LE-NEXT: mfvsrwz 3, 34 162; CHECK-LE-NEXT: blr 163; 164; CHECK-BE-LABEL: test7: 165; CHECK-BE: # %bb.0: # %entry 166; CHECK-BE-NEXT: li 3, 8 167; CHECK-BE-NEXT: vextuwlx 3, 3, 2 168; CHECK-BE-NEXT: blr 169entry: 170 %vecext = extractelement <4 x i32> %a, i32 2 171 ret i32 %vecext 172} 173 174define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) { 175; CHECK-LE-LABEL: testadd_7: 176; CHECK-LE: # %bb.0: # %entry 177; CHECK-LE-NEXT: mfvsrwz 3, 34 178; CHECK-LE-NEXT: add 3, 3, 5 179; CHECK-LE-NEXT: clrldi 3, 3, 32 180; CHECK-LE-NEXT: blr 181; 182; CHECK-BE-LABEL: testadd_7: 183; CHECK-BE: # %bb.0: # %entry 184; CHECK-BE-NEXT: li 3, 8 185; CHECK-BE-NEXT: vextuwlx 3, 3, 2 186; CHECK-BE-NEXT: add 3, 3, 5 187; CHECK-BE-NEXT: clrldi 3, 3, 32 188; CHECK-BE-NEXT: blr 189entry: 190 %vecext = extractelement <4 x i32> %a, i32 2 191 %add = add i32 %vecext, %c 192 ret i32 %add 193} 194 195define signext i32 @test8(<4 x i32> %a) { 196; CHECK-LE-LABEL: test8: 197; CHECK-LE: # %bb.0: # %entry 198; CHECK-LE-NEXT: mfvsrwz 3, 34 199; CHECK-LE-NEXT: extsw 3, 3 200; CHECK-LE-NEXT: blr 201; 202; CHECK-BE-LABEL: test8: 203; CHECK-BE: # %bb.0: # %entry 204; CHECK-BE-NEXT: li 3, 8 205; CHECK-BE-NEXT: vextuwlx 3, 3, 2 206; CHECK-BE-NEXT: extsw 3, 3 207; CHECK-BE-NEXT: blr 208entry: 209 %vecext = extractelement <4 x i32> %a, i32 2 210 ret i32 %vecext 211} 212 213define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) { 214; CHECK-LE-LABEL: testadd_8: 215; CHECK-LE: # %bb.0: # %entry 216; CHECK-LE-NEXT: mfvsrwz 3, 34 217; CHECK-LE-NEXT: add 3, 3, 5 218; CHECK-LE-NEXT: extsw 3, 3 219; CHECK-LE-NEXT: blr 220; 221; CHECK-BE-LABEL: testadd_8: 222; CHECK-BE: # %bb.0: # %entry 223; CHECK-BE-NEXT: li 3, 8 224; CHECK-BE-NEXT: vextuwlx 3, 3, 2 225; CHECK-BE-NEXT: add 3, 3, 5 226; CHECK-BE-NEXT: extsw 3, 3 227; CHECK-BE-NEXT: blr 228entry: 229 %vecext = extractelement <4 x i32> %a, i32 2 230 %add = add nsw i32 %vecext, %c 231 ret i32 %add 232} 233 234; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx 235define signext i32 @test9(<4 x i32> %a) { 236; CHECK-LE-LABEL: test9: 237; CHECK-LE: # %bb.0: # %entry 238; CHECK-LE-NEXT: li 3, 4 239; CHECK-LE-NEXT: vextuwrx 3, 3, 2 240; CHECK-LE-NEXT: extsw 3, 3 241; CHECK-LE-NEXT: blr 242; 243; CHECK-BE-LABEL: test9: 244; CHECK-BE: # %bb.0: # %entry 245; CHECK-BE-NEXT: mfvsrwz 3, 34 246; CHECK-BE-NEXT: extsw 3, 3 247; CHECK-BE-NEXT: blr 248entry: 249 %vecext = extractelement <4 x i32> %a, i32 1 250 ret i32 %vecext 251} 252 253define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) { 254; CHECK-LE-LABEL: testadd_9: 255; CHECK-LE: # %bb.0: # %entry 256; CHECK-LE-NEXT: li 3, 4 257; CHECK-LE-NEXT: vextuwrx 3, 3, 2 258; CHECK-LE-NEXT: add 3, 3, 5 259; CHECK-LE-NEXT: extsw 3, 3 260; CHECK-LE-NEXT: blr 261; 262; CHECK-BE-LABEL: testadd_9: 263; CHECK-BE: # %bb.0: # %entry 264; CHECK-BE-NEXT: mfvsrwz 3, 34 265; CHECK-BE-NEXT: add 3, 3, 5 266; CHECK-BE-NEXT: extsw 3, 3 267; CHECK-BE-NEXT: blr 268entry: 269 %vecext = extractelement <4 x i32> %a, i32 1 270 %add = add nsw i32 %vecext, %c 271 ret i32 %add 272} 273