1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefixes=OPT,NONSTRESS 3; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,OPT,STRESS 4; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,DISABLE 5 6; CodeGenPrepare should move the zext into the block with the load 7; so that SelectionDAG can select it with the load. 8define void @foo(ptr %p, ptr %q) { 9; OPTALL-LABEL: define void @foo( 10; OPTALL-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { 11; OPTALL-NEXT: [[ENTRY:.*:]] 12; OPTALL-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 13; OPTALL-NEXT: [[S:%.*]] = zext i8 [[T]] to i32 14; OPTALL-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 15; OPTALL-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 16; OPTALL: [[TRUE]]: 17; OPTALL-NEXT: store i32 [[S]], ptr [[Q]], align 4 18; OPTALL-NEXT: ret void 19; OPTALL: [[FALSE]]: 20; OPTALL-NEXT: ret void 21; 22entry: 23 %t = load i8, ptr %p 24 %a = icmp slt i8 %t, 20 25 br i1 %a, label %true, label %false 26true: 27 %s = zext i8 %t to i32 28 store i32 %s, ptr %q 29 ret void 30false: 31 ret void 32} 33 34; Check that we manage to form a zextload is an operation with only one 35; argument to explicitly extend is in the way. 36; Make sure the operation is not promoted when the promotion pass is disabled. 37define void @promoteOneArg(ptr %p, ptr %q) { 38; OPT-LABEL: define void @promoteOneArg( 39; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { 40; OPT-NEXT: [[ENTRY:.*:]] 41; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 42; OPT-NEXT: [[PROMOTED:%.*]] = zext i8 [[T]] to i32 43; OPT-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED]], 2 44; OPT-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 45; OPT-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 46; OPT: [[TRUE]]: 47; OPT-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 48; OPT-NEXT: ret void 49; OPT: [[FALSE]]: 50; OPT-NEXT: ret void 51; 52; DISABLE-LABEL: define void @promoteOneArg( 53; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { 54; DISABLE-NEXT: [[ENTRY:.*:]] 55; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 56; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], 2 57; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 58; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 59; DISABLE: [[TRUE]]: 60; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 61; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 62; DISABLE-NEXT: ret void 63; DISABLE: [[FALSE]]: 64; DISABLE-NEXT: ret void 65; 66entry: 67 %t = load i8, ptr %p 68 %add = add nuw i8 %t, 2 69 %a = icmp slt i8 %t, 20 70 br i1 %a, label %true, label %false 71true: 72 %s = zext i8 %add to i32 73 store i32 %s, ptr %q 74 ret void 75false: 76 ret void 77} 78 79; Check that we manage to form a sextload is an operation with only one 80; argument to explicitly extend is in the way. 81; Version with sext. 82define void @promoteOneArgSExt(ptr %p, ptr %q) { 83; OPT-LABEL: define void @promoteOneArgSExt( 84; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { 85; OPT-NEXT: [[ENTRY:.*:]] 86; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 87; OPT-NEXT: [[PROMOTED:%.*]] = sext i8 [[T]] to i32 88; OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED]], 2 89; OPT-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 90; OPT-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 91; OPT: [[TRUE]]: 92; OPT-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 93; OPT-NEXT: ret void 94; OPT: [[FALSE]]: 95; OPT-NEXT: ret void 96; 97; DISABLE-LABEL: define void @promoteOneArgSExt( 98; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { 99; DISABLE-NEXT: [[ENTRY:.*:]] 100; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 101; DISABLE-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], 2 102; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 103; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 104; DISABLE: [[TRUE]]: 105; DISABLE-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 106; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 107; DISABLE-NEXT: ret void 108; DISABLE: [[FALSE]]: 109; DISABLE-NEXT: ret void 110; 111entry: 112 %t = load i8, ptr %p 113 %add = add nsw i8 %t, 2 114 %a = icmp slt i8 %t, 20 115 br i1 %a, label %true, label %false 116true: 117 %s = sext i8 %add to i32 118 store i32 %s, ptr %q 119 ret void 120false: 121 ret void 122} 123 124; Check that we manage to form a zextload is an operation with two 125; arguments to explicitly extend is in the way. 126; Extending %add will create two extensions: 127; 1. One for %b. 128; 2. One for %t. 129; #1 will not be removed as we do not know anything about %b. 130; #2 may not be merged with the load because %t is used in a comparison. 131; Since two extensions may be emitted in the end instead of one before the 132; transformation, the regular heuristic does not apply the optimization. 133define void @promoteTwoArgZext(ptr %p, ptr %q, i8 %b) { 134; NONSTRESS-LABEL: define void @promoteTwoArgZext( 135; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 136; NONSTRESS-NEXT: [[ENTRY:.*:]] 137; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 138; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], [[B]] 139; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 140; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 141; NONSTRESS: [[TRUE]]: 142; NONSTRESS-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 143; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 144; NONSTRESS-NEXT: ret void 145; NONSTRESS: [[FALSE]]: 146; NONSTRESS-NEXT: ret void 147; 148; STRESS-LABEL: define void @promoteTwoArgZext( 149; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 150; STRESS-NEXT: [[ENTRY:.*:]] 151; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 152; STRESS-NEXT: [[PROMOTED:%.*]] = zext i8 [[T]] to i32 153; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i8 [[B]] to i32 154; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED]], [[PROMOTED1]] 155; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 156; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 157; STRESS: [[TRUE]]: 158; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 159; STRESS-NEXT: ret void 160; STRESS: [[FALSE]]: 161; STRESS-NEXT: ret void 162; 163; DISABLE-LABEL: define void @promoteTwoArgZext( 164; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 165; DISABLE-NEXT: [[ENTRY:.*:]] 166; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 167; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], [[B]] 168; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 169; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 170; DISABLE: [[TRUE]]: 171; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 172; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 173; DISABLE-NEXT: ret void 174; DISABLE: [[FALSE]]: 175; DISABLE-NEXT: ret void 176; 177entry: 178 %t = load i8, ptr %p 179 %add = add nuw i8 %t, %b 180 %a = icmp slt i8 %t, 20 181 br i1 %a, label %true, label %false 182true: 183 %s = zext i8 %add to i32 184 store i32 %s, ptr %q 185 ret void 186false: 187 ret void 188} 189 190; Check that we manage to form a sextload is an operation with two 191; arguments to explicitly extend is in the way. 192; Version with sext. 193define void @promoteTwoArgSExt(ptr %p, ptr %q, i8 %b) { 194; NONSTRESS-LABEL: define void @promoteTwoArgSExt( 195; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 196; NONSTRESS-NEXT: [[ENTRY:.*:]] 197; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 198; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], [[B]] 199; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 200; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 201; NONSTRESS: [[TRUE]]: 202; NONSTRESS-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 203; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 204; NONSTRESS-NEXT: ret void 205; NONSTRESS: [[FALSE]]: 206; NONSTRESS-NEXT: ret void 207; 208; STRESS-LABEL: define void @promoteTwoArgSExt( 209; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 210; STRESS-NEXT: [[ENTRY:.*:]] 211; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 212; STRESS-NEXT: [[PROMOTED:%.*]] = sext i8 [[T]] to i32 213; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i8 [[B]] to i32 214; STRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED]], [[PROMOTED1]] 215; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 216; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 217; STRESS: [[TRUE]]: 218; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 219; STRESS-NEXT: ret void 220; STRESS: [[FALSE]]: 221; STRESS-NEXT: ret void 222; 223; DISABLE-LABEL: define void @promoteTwoArgSExt( 224; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { 225; DISABLE-NEXT: [[ENTRY:.*:]] 226; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 227; DISABLE-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], [[B]] 228; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 229; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 230; DISABLE: [[TRUE]]: 231; DISABLE-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 232; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 233; DISABLE-NEXT: ret void 234; DISABLE: [[FALSE]]: 235; DISABLE-NEXT: ret void 236; 237entry: 238 %t = load i8, ptr %p 239 %add = add nsw i8 %t, %b 240 %a = icmp slt i8 %t, 20 241 br i1 %a, label %true, label %false 242true: 243 %s = sext i8 %add to i32 244 store i32 %s, ptr %q 245 ret void 246false: 247 ret void 248} 249 250; Check that we do not a zextload if we need to introduce more than 251; one additional extension. 252define void @promoteThreeArgZext(ptr %p, ptr %q, i8 %b, i8 %c) { 253; NONSTRESS-LABEL: define void @promoteThreeArgZext( 254; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { 255; NONSTRESS-NEXT: [[ENTRY:.*:]] 256; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 257; NONSTRESS-NEXT: [[TMP:%.*]] = add nuw i8 [[T]], [[B]] 258; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]] 259; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 260; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 261; NONSTRESS: [[TRUE]]: 262; NONSTRESS-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 263; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 264; NONSTRESS-NEXT: ret void 265; NONSTRESS: [[FALSE]]: 266; NONSTRESS-NEXT: ret void 267; 268; STRESS-LABEL: define void @promoteThreeArgZext( 269; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { 270; STRESS-NEXT: [[ENTRY:.*:]] 271; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 272; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 273; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[B]] to i32 274; STRESS-NEXT: [[TMP:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED3]] 275; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i8 [[C]] to i32 276; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP]], [[PROMOTED1]] 277; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 278; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 279; STRESS: [[TRUE]]: 280; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 281; STRESS-NEXT: ret void 282; STRESS: [[FALSE]]: 283; STRESS-NEXT: ret void 284; 285; DISABLE-LABEL: define void @promoteThreeArgZext( 286; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { 287; DISABLE-NEXT: [[ENTRY:.*:]] 288; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 289; DISABLE-NEXT: [[TMP:%.*]] = add nuw i8 [[T]], [[B]] 290; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]] 291; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 292; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 293; DISABLE: [[TRUE]]: 294; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 295; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 296; DISABLE-NEXT: ret void 297; DISABLE: [[FALSE]]: 298; DISABLE-NEXT: ret void 299; 300entry: 301 %t = load i8, ptr %p 302 %tmp = add nuw i8 %t, %b 303 %add = add nuw i8 %tmp, %c 304 %a = icmp slt i8 %t, 20 305 br i1 %a, label %true, label %false 306true: 307 %s = zext i8 %add to i32 308 store i32 %s, ptr %q 309 ret void 310false: 311 ret void 312} 313 314; Check that we manage to form a zextload after promoting and merging 315; two extensions. 316define void @promoteMergeExtArgZExt(ptr %p, ptr %q, i16 %b) { 317; NONSTRESS-LABEL: define void @promoteMergeExtArgZExt( 318; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 319; NONSTRESS-NEXT: [[ENTRY:.*:]] 320; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 321; NONSTRESS-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 322; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]] 323; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 324; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 325; NONSTRESS: [[TRUE]]: 326; NONSTRESS-NEXT: [[S:%.*]] = zext i16 [[ADD]] to i32 327; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 328; NONSTRESS-NEXT: ret void 329; NONSTRESS: [[FALSE]]: 330; NONSTRESS-NEXT: ret void 331; 332; STRESS-LABEL: define void @promoteMergeExtArgZExt( 333; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 334; STRESS-NEXT: [[ENTRY:.*:]] 335; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 336; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 337; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i16 [[B]] to i32 338; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED1]] 339; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 340; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 341; STRESS: [[TRUE]]: 342; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 343; STRESS-NEXT: ret void 344; STRESS: [[FALSE]]: 345; STRESS-NEXT: ret void 346; 347; DISABLE-LABEL: define void @promoteMergeExtArgZExt( 348; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 349; DISABLE-NEXT: [[ENTRY:.*:]] 350; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 351; DISABLE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 352; DISABLE-NEXT: [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]] 353; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 354; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 355; DISABLE: [[TRUE]]: 356; DISABLE-NEXT: [[S:%.*]] = zext i16 [[ADD]] to i32 357; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 358; DISABLE-NEXT: ret void 359; DISABLE: [[FALSE]]: 360; DISABLE-NEXT: ret void 361; 362entry: 363 %t = load i8, ptr %p 364 %ext = zext i8 %t to i16 365 %add = add nuw i16 %ext, %b 366 %a = icmp slt i8 %t, 20 367 br i1 %a, label %true, label %false 368true: 369 %s = zext i16 %add to i32 370 store i32 %s, ptr %q 371 ret void 372false: 373 ret void 374} 375 376; Check that we manage to form a sextload after promoting and merging 377; two extensions. 378; Version with sext. 379define void @promoteMergeExtArgSExt(ptr %p, ptr %q, i16 %b) { 380; NONSTRESS-LABEL: define void @promoteMergeExtArgSExt( 381; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 382; NONSTRESS-NEXT: [[ENTRY:.*:]] 383; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 384; NONSTRESS-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 385; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]] 386; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 387; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 388; NONSTRESS: [[TRUE]]: 389; NONSTRESS-NEXT: [[S:%.*]] = sext i16 [[ADD]] to i32 390; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 391; NONSTRESS-NEXT: ret void 392; NONSTRESS: [[FALSE]]: 393; NONSTRESS-NEXT: ret void 394; 395; STRESS-LABEL: define void @promoteMergeExtArgSExt( 396; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 397; STRESS-NEXT: [[ENTRY:.*:]] 398; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 399; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 400; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i16 [[B]] to i32 401; STRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED2]], [[PROMOTED1]] 402; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 403; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 404; STRESS: [[TRUE]]: 405; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 406; STRESS-NEXT: ret void 407; STRESS: [[FALSE]]: 408; STRESS-NEXT: ret void 409; 410; DISABLE-LABEL: define void @promoteMergeExtArgSExt( 411; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { 412; DISABLE-NEXT: [[ENTRY:.*:]] 413; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 414; DISABLE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 415; DISABLE-NEXT: [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]] 416; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 417; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] 418; DISABLE: [[TRUE]]: 419; DISABLE-NEXT: [[S:%.*]] = sext i16 [[ADD]] to i32 420; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 421; DISABLE-NEXT: ret void 422; DISABLE: [[FALSE]]: 423; DISABLE-NEXT: ret void 424; 425entry: 426 %t = load i8, ptr %p 427 %ext = zext i8 %t to i16 428 %add = add nsw i16 %ext, %b 429 %a = icmp slt i8 %t, 20 430 br i1 %a, label %true, label %false 431true: 432 %s = sext i16 %add to i32 433 store i32 %s, ptr %q 434 ret void 435false: 436 ret void 437} 438 439; Check that we manage to catch all the extload opportunities that are exposed 440; by the different iterations of codegen prepare. 441; Moreover, check that we do not promote more than we need to. 442; Here is what is happening in this test (not necessarly in this order): 443; 1. We try to promote the operand of %sextadd. 444; a. This creates one sext of %ld2 and one of %zextld 445; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 446; introduced one. This is fine with the current heuristic: neutral. 447; => We have one zext of %zextld left and we created one sext of %ld2. 448; 2. We try to promote the operand of %sextaddza. 449; a. This creates one sext of %zexta and one of %zextld 450; b. The sext of %zexta can be combined with the zext of %a. 451; c. The sext of %zextld leads to %ld and can be combined with it. This is 452; done by promoting %zextld. This is fine with the current heuristic: 453; neutral. 454; => We have created a new zext of %ld and we created one sext of %zexta. 455; 3. We try to promote the operand of %sextaddb. 456; a. This creates one sext of %b and one of %zextld 457; b. The sext of %b is a dead-end, nothing to be done. 458; c. Same thing as 2.c. happens. 459; => We have created a new zext of %ld and we created one sext of %b. 460; 4. We try to promote the operand of the zext of %zextld introduced in #1. 461; a. Same thing as 2.c. happens. 462; b. %zextld does not have any other uses. It is dead coded. 463; => We have created a new zext of %ld and we removed a zext of %zextld and 464; a zext of %ld. 465; Currently we do not try to reuse existing extensions, so in the end we have 466; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 467define void @severalPromotions(ptr %addr1, ptr %addr2, i8 %a, i32 %b) { 468; OPT-LABEL: define void @severalPromotions( 469; OPT-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) { 470; OPT-NEXT: [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1 471; OPT-NEXT: [[PROMOTED9:%.*]] = zext i8 [[LD]] to i64 472; OPT-NEXT: [[PROMOTED6:%.*]] = zext i8 [[LD]] to i64 473; OPT-NEXT: [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4 474; OPT-NEXT: [[PROMOTED:%.*]] = sext i32 [[LD2]] to i64 475; OPT-NEXT: [[PROMOTED2:%.*]] = zext i8 [[LD]] to i64 476; OPT-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED2]] 477; OPT-NEXT: [[PROMOTED5:%.*]] = zext i8 [[A]] to i64 478; OPT-NEXT: [[ADDZA:%.*]] = add nsw i64 [[PROMOTED5]], [[PROMOTED6]] 479; OPT-NEXT: [[PROMOTED7:%.*]] = sext i32 [[B]] to i64 480; OPT-NEXT: [[ADDB:%.*]] = add nsw i64 [[PROMOTED7]], [[PROMOTED9]] 481; OPT-NEXT: call void @dummy(i64 [[ADD]], i64 [[ADDZA]], i64 [[ADDB]]) 482; OPT-NEXT: ret void 483; 484; DISABLE-LABEL: define void @severalPromotions( 485; DISABLE-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) { 486; DISABLE-NEXT: [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1 487; DISABLE-NEXT: [[ZEXTLD:%.*]] = zext i8 [[LD]] to i32 488; DISABLE-NEXT: [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4 489; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[LD2]], [[ZEXTLD]] 490; DISABLE-NEXT: [[SEXTADD:%.*]] = sext i32 [[ADD]] to i64 491; DISABLE-NEXT: [[ZEXTA:%.*]] = zext i8 [[A]] to i32 492; DISABLE-NEXT: [[ADDZA:%.*]] = add nsw i32 [[ZEXTA]], [[ZEXTLD]] 493; DISABLE-NEXT: [[SEXTADDZA:%.*]] = sext i32 [[ADDZA]] to i64 494; DISABLE-NEXT: [[ADDB:%.*]] = add nsw i32 [[B]], [[ZEXTLD]] 495; DISABLE-NEXT: [[SEXTADDB:%.*]] = sext i32 [[ADDB]] to i64 496; DISABLE-NEXT: call void @dummy(i64 [[SEXTADD]], i64 [[SEXTADDZA]], i64 [[SEXTADDB]]) 497; DISABLE-NEXT: ret void 498; 499 %ld = load i8, ptr %addr1 500 %zextld = zext i8 %ld to i32 501 %ld2 = load i32, ptr %addr2 502 %add = add nsw i32 %ld2, %zextld 503 %sextadd = sext i32 %add to i64 504 %zexta = zext i8 %a to i32 505 %addza = add nsw i32 %zexta, %zextld 506 %sextaddza = sext i32 %addza to i64 507 %addb = add nsw i32 %b, %zextld 508 %sextaddb = sext i32 %addb to i64 509 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 510 ret void 511} 512 513declare void @dummy(i64, i64, i64) 514 515; Make sure we do not try to promote vector types since the type promotion 516; helper does not support them for now. 517define void @vectorPromotion() { 518; OPTALL-LABEL: define void @vectorPromotion() { 519; OPTALL-NEXT: [[ENTRY:.*:]] 520; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) 521; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> 522; OPTALL-NEXT: ret void 523; 524entry: 525 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 526 %b = zext <2 x i32> %a to <2 x i64> 527 ret void 528} 529 530@a = common global i32 0, align 4 531@c = common global [2 x i32] zeroinitializer, align 4 532 533; Make sure we support promotion of operands that produces a Value as opposed 534; to an instruction. 535; This used to cause a crash. 536define i32 @promotionOfArgEndsUpInValue(ptr %addr) { 537; OPT-LABEL: define i32 @promotionOfArgEndsUpInValue( 538; OPT-SAME: ptr [[ADDR:%.*]]) { 539; OPT-NEXT: [[ENTRY:.*:]] 540; OPT-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 541; OPT-NEXT: [[PROMOTED:%.*]] = sext i16 [[VAL]] to i32 542; OPT-NEXT: [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a 543; OPT-NEXT: [[PROMOTED2:%.*]] = zext i1 [[CMP]] to i32 544; OPT-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PROMOTED]], [[PROMOTED2]] 545; OPT-NEXT: ret i32 [[ADD]] 546; 547; DISABLE-LABEL: define i32 @promotionOfArgEndsUpInValue( 548; DISABLE-SAME: ptr [[ADDR:%.*]]) { 549; DISABLE-NEXT: [[ENTRY:.*:]] 550; DISABLE-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 551; DISABLE-NEXT: [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a 552; DISABLE-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i16 553; DISABLE-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[VAL]], [[EXT]] 554; DISABLE-NEXT: [[CONV3:%.*]] = sext i16 [[ADD]] to i32 555; DISABLE-NEXT: ret i32 [[CONV3]] 556; 557entry: 558 %val = load i16, ptr %addr 559 %cmp = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a 560 %ext = zext i1 %cmp to i16 561 %add = add nuw nsw i16 %val, %ext 562 %conv3 = sext i16 %add to i32 563 ret i32 %conv3 564} 565 566; Check that we see that one zext can be derived from the other for free. 567define void @promoteTwoArgZextWithSourceExtendedTwice(ptr %p, ptr %q, i32 %b, ptr %addr) { 568; OPT-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice( 569; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 570; OPT-NEXT: [[ENTRY:.*:]] 571; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 572; OPT-NEXT: [[PROMOTED1:%.*]] = zext i8 [[T]] to i64 573; OPT-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 574; OPT-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 575; OPT-NEXT: [[ADD2:%.*]] = add nuw i64 [[PROMOTED1]], 12 576; OPT-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 577; OPT-NEXT: store i64 [[ADD2]], ptr [[Q]], align 8 578; OPT-NEXT: ret void 579; 580; DISABLE-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice( 581; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 582; DISABLE-NEXT: [[ENTRY:.*:]] 583; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 584; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 585; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 586; DISABLE-NEXT: [[ADD2:%.*]] = add nuw i32 [[ZEXTT]], 12 587; DISABLE-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 588; DISABLE-NEXT: [[S:%.*]] = zext i32 [[ADD2]] to i64 589; DISABLE-NEXT: store i64 [[S]], ptr [[Q]], align 8 590; DISABLE-NEXT: ret void 591; 592entry: 593 %t = load i8, ptr %p 594 %zextt = zext i8 %t to i32 595 %add = add nuw i32 %zextt, %b 596 %add2 = add nuw i32 %zextt, 12 597 store i32 %add, ptr %addr 598 %s = zext i32 %add2 to i64 599 store i64 %s, ptr %q 600 ret void 601} 602 603; Check that we do not increase the cost of the code. 604; The input has one free zext and one free sext. If we would have promoted 605; all the way through the load we would end up with a free zext and a 606; non-free sext (of %b). 607define void @doNotPromoteFreeSExtFromAddrMode(ptr %p, i32 %b, ptr %addr) { 608; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( 609; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 610; NONSTRESS-NEXT: [[ENTRY:.*:]] 611; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 612; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 613; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 614; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 615; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] 616; NONSTRESS-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 617; NONSTRESS-NEXT: ret void 618; 619; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( 620; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 621; STRESS-NEXT: [[ENTRY:.*:]] 622; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 623; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[T]] to i64 624; STRESS-NEXT: [[PROMOTED2:%.*]] = sext i32 [[B]] to i64 625; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]] 626; STRESS-NEXT: [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32 627; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]] 628; STRESS-NEXT: store i32 [[PROMOTED]], ptr [[STADDR]], align 4 629; STRESS-NEXT: ret void 630; 631; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( 632; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 633; DISABLE-NEXT: [[ENTRY:.*:]] 634; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 635; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 636; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 637; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 638; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] 639; DISABLE-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 640; DISABLE-NEXT: ret void 641; 642entry: 643 %t = load i8, ptr %p 644 %zextt = zext i8 %t to i32 645 %add = add nsw i32 %zextt, %b 646 %idx64 = sext i32 %add to i64 647 %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64 648 store i32 %add, ptr %staddr 649 ret void 650} 651 652; Check that we do not increase the cost of the code. 653; The input has one free zext and one free sext. If we would have promoted 654; all the way through the load we would end up with a free zext and a 655; non-free sext (of %b). 656define void @doNotPromoteFreeSExtFromAddrMode64(ptr %p, i32 %b, ptr %addr, i64 %stuff) { 657; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( 658; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { 659; NONSTRESS-NEXT: [[ENTRY:.*:]] 660; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 661; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 662; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 663; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 664; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]] 665; NONSTRESS-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 666; NONSTRESS-NEXT: ret void 667; 668; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( 669; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { 670; STRESS-NEXT: [[ENTRY:.*:]] 671; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 672; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 673; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 674; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] 675; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[ADD]] 676; STRESS-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 677; STRESS-NEXT: ret void 678; 679; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( 680; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { 681; DISABLE-NEXT: [[ENTRY:.*:]] 682; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 683; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 684; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 685; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 686; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]] 687; DISABLE-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 688; DISABLE-NEXT: ret void 689; 690entry: 691 %t = load i8, ptr %p 692 %zextt = zext i8 %t to i32 693 %add = add nsw i32 %zextt, %b 694 %idx64 = sext i32 %add to i64 695 %staddr = getelementptr inbounds i64, ptr %addr, i64 %idx64 696 store i64 %stuff, ptr %staddr 697 ret void 698} 699 700; Check that we do not increase the cost of the code. 701; The input has one free zext and one free sext. If we would have promoted 702; all the way through the load we would end up with a free zext and a 703; non-free sext (of %b). 704define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) { 705; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( 706; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { 707; NONSTRESS-NEXT: [[ENTRY:.*:]] 708; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 709; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 710; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 711; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 712; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]] 713; NONSTRESS-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 714; NONSTRESS-NEXT: ret void 715; 716; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( 717; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { 718; STRESS-NEXT: [[ENTRY:.*:]] 719; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 720; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 721; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 722; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] 723; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[ADD]] 724; STRESS-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 725; STRESS-NEXT: ret void 726; 727; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( 728; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { 729; DISABLE-NEXT: [[ENTRY:.*:]] 730; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 731; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 732; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 733; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 734; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]] 735; DISABLE-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 736; DISABLE-NEXT: ret void 737; 738entry: 739 %t = load i8, ptr %p 740 %zextt = zext i8 %t to i32 741 %add = add nsw i32 %zextt, %b 742 %idx64 = sext i32 %add to i64 743 %staddr = getelementptr inbounds i128, ptr %addr, i64 %idx64 744 store i128 %stuff, ptr %staddr 745 ret void 746} 747 748 749; Check that we do not increase the cost of the code. 750; The input has one free zext and one free sext. If we would have promoted 751; all the way through the load we would end up with a free zext and a 752; non-free sext (of %b). 753define void @promoteSExtFromAddrMode256(ptr %p, i32 %b, ptr %addr, i256 %stuff) { 754; OPT-LABEL: define void @promoteSExtFromAddrMode256( 755; OPT-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) { 756; OPT-NEXT: [[ENTRY:.*:]] 757; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 758; OPT-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 759; OPT-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 760; OPT-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] 761; OPT-NEXT: [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[ADD]] 762; OPT-NEXT: store i256 [[STUFF]], ptr [[STADDR]], align 16 763; OPT-NEXT: ret void 764; 765; DISABLE-LABEL: define void @promoteSExtFromAddrMode256( 766; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) { 767; DISABLE-NEXT: [[ENTRY:.*:]] 768; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 769; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 770; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 771; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 772; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[IDX64]] 773; DISABLE-NEXT: store i256 [[STUFF]], ptr [[STADDR]], align 16 774; DISABLE-NEXT: ret void 775; 776entry: 777 %t = load i8, ptr %p 778 %zextt = zext i8 %t to i32 779 %add = add nsw i32 %zextt, %b 780 %idx64 = sext i32 %add to i64 781 %staddr = getelementptr inbounds i256, ptr %addr, i64 %idx64 782 store i256 %stuff, ptr %staddr 783 ret void 784} 785 786; Check that we do not increase the cost of the code. 787; The input has one free zext and one free zext. 788; When we promote all the way through the load, we end up with 789; a free zext and a non-free zext (of %b). 790; However, the current target lowering says zext i32 to i64 is free 791; so the promotion happens because the cost did not change and may 792; expose more opportunities. 793; This would need to be fixed at some point. 794; 795; This transformation should really happen only for stress mode. 796define void @doNotPromoteFreeZExtFromAddrMode(ptr %p, i32 %b, ptr %addr) { 797; NONSTRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( 798; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 799; NONSTRESS-NEXT: [[ENTRY:.*:]] 800; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 801; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 802; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 803; NONSTRESS-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 804; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] 805; NONSTRESS-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 806; NONSTRESS-NEXT: ret void 807; 808; STRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( 809; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 810; STRESS-NEXT: [[ENTRY:.*:]] 811; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 812; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[T]] to i64 813; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i32 [[B]] to i64 814; STRESS-NEXT: [[ADD:%.*]] = add nuw i64 [[PROMOTED3]], [[PROMOTED2]] 815; STRESS-NEXT: [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32 816; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]] 817; STRESS-NEXT: store i32 [[PROMOTED]], ptr [[STADDR]], align 4 818; STRESS-NEXT: ret void 819; 820; DISABLE-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( 821; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { 822; DISABLE-NEXT: [[ENTRY:.*:]] 823; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 824; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 825; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 826; DISABLE-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 827; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] 828; DISABLE-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 829; DISABLE-NEXT: ret void 830; 831entry: 832 %t = load i8, ptr %p 833 %zextt = zext i8 %t to i32 834 %add = add nuw i32 %zextt, %b 835 %idx64 = zext i32 %add to i64 836 %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64 837 store i32 %add, ptr %staddr 838 ret void 839} 840 841define i64 @doNotPromoteFreeSExtFromShift(ptr %p, i32 %b) { 842; NONSTRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift( 843; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 844; NONSTRESS-NEXT: [[ENTRY:.*:]] 845; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 846; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 847; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 848; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 849; NONSTRESS-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 850; NONSTRESS-NEXT: ret i64 [[STADDR]] 851; 852; STRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift( 853; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 854; STRESS-NEXT: [[ENTRY:.*:]] 855; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 856; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 857; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 858; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] 859; STRESS-NEXT: [[STADDR:%.*]] = shl i64 [[ADD]], 12 860; STRESS-NEXT: ret i64 [[STADDR]] 861; 862; DISABLE-LABEL: define i64 @doNotPromoteFreeSExtFromShift( 863; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 864; DISABLE-NEXT: [[ENTRY:.*:]] 865; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 866; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 867; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] 868; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 869; DISABLE-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 870; DISABLE-NEXT: ret i64 [[STADDR]] 871; 872entry: 873 %t = load i8, ptr %p 874 %zextt = zext i8 %t to i32 875 %add = add nsw i32 %zextt, %b 876 %idx64 = sext i32 %add to i64 877 %staddr = shl i64 %idx64, 12 878 ret i64 %staddr 879} 880 881; Same comment as doNotPromoteFreeZExtFromAddrMode. 882; 883; This transformation should really happen only for stress mode. 884define i64 @doNotPromoteFreeZExtFromShift(ptr %p, i32 %b) { 885; NONSTRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift( 886; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 887; NONSTRESS-NEXT: [[ENTRY:.*:]] 888; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 889; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 890; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 891; NONSTRESS-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 892; NONSTRESS-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 893; NONSTRESS-NEXT: ret i64 [[STADDR]] 894; 895; STRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift( 896; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 897; STRESS-NEXT: [[ENTRY:.*:]] 898; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 899; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 900; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i32 [[B]] to i64 901; STRESS-NEXT: [[ADD:%.*]] = add nuw i64 [[PROMOTED2]], [[PROMOTED1]] 902; STRESS-NEXT: [[STADDR:%.*]] = shl i64 [[ADD]], 12 903; STRESS-NEXT: ret i64 [[STADDR]] 904; 905; DISABLE-LABEL: define i64 @doNotPromoteFreeZExtFromShift( 906; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { 907; DISABLE-NEXT: [[ENTRY:.*:]] 908; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 909; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 910; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] 911; DISABLE-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 912; DISABLE-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 913; DISABLE-NEXT: ret i64 [[STADDR]] 914; 915entry: 916 %t = load i8, ptr %p 917 %zextt = zext i8 %t to i32 918 %add = add nuw i32 %zextt, %b 919 %idx64 = zext i32 %add to i64 920 %staddr = shl i64 %idx64, 12 921 ret i64 %staddr 922} 923 924; The input has one free zext and one non-free sext. 925; When we promote all the way through to the load, we end up with 926; a free zext, a free sext (%ld1), and a non-free sext (of %cst). 927; However, we when generate load pair and the free sext(%ld1) becomes 928; non-free. So technically, we trade a non-free sext to two non-free 929; sext. 930; This would need to be fixed at some point. 931; 932; This transformation should really happen only for stress mode. 933define i64 @doNotPromoteBecauseOfPairedLoad(ptr %p, i32 %cst) { 934; OPT-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad( 935; OPT-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) { 936; OPT-NEXT: [[LD0:%.*]] = load i32, ptr [[P]], align 4 937; OPT-NEXT: [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 938; OPT-NEXT: [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4 939; OPT-NEXT: [[PROMOTED:%.*]] = sext i32 [[LD1]] to i64 940; OPT-NEXT: [[PROMOTED1:%.*]] = sext i32 [[CST]] to i64 941; OPT-NEXT: [[RES:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED1]] 942; OPT-NEXT: [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64 943; OPT-NEXT: [[FINAL:%.*]] = add i64 [[RES]], [[ZEXTLD0]] 944; OPT-NEXT: ret i64 [[FINAL]] 945; 946; DISABLE-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad( 947; DISABLE-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) { 948; DISABLE-NEXT: [[LD0:%.*]] = load i32, ptr [[P]], align 4 949; DISABLE-NEXT: [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 950; DISABLE-NEXT: [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4 951; DISABLE-NEXT: [[RES:%.*]] = add nsw i32 [[LD1]], [[CST]] 952; DISABLE-NEXT: [[SEXTRES:%.*]] = sext i32 [[RES]] to i64 953; DISABLE-NEXT: [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64 954; DISABLE-NEXT: [[FINAL:%.*]] = add i64 [[SEXTRES]], [[ZEXTLD0]] 955; DISABLE-NEXT: ret i64 [[FINAL]] 956; 957 %ld0 = load i32, ptr %p 958 %idxLd1 = getelementptr inbounds i32, ptr %p, i64 1 959 %ld1 = load i32, ptr %idxLd1 960 %res = add nsw i32 %ld1, %cst 961 %sextres = sext i32 %res to i64 962 %zextLd0 = zext i32 %ld0 to i64 963 %final = add i64 %sextres, %zextLd0 964 ret i64 %final 965} 966 967define i64 @promoteZextShl(i1 %c, ptr %P) { 968; OPT-LABEL: define i64 @promoteZextShl( 969; OPT-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { 970; OPT-NEXT: [[ENTRY:.*:]] 971; OPT-NEXT: [[LD:%.*]] = load i16, ptr [[P]], align 2 972; OPT-NEXT: [[PROMOTED1:%.*]] = zext i16 [[LD]] to i64 973; OPT-NEXT: br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]] 974; OPT: [[IF_THEN]]: 975; OPT-NEXT: [[SHL2:%.*]] = shl nsw i64 [[PROMOTED1]], 1 976; OPT-NEXT: ret i64 [[SHL2]] 977; OPT: [[END]]: 978; OPT-NEXT: ret i64 0 979; 980; DISABLE-LABEL: define i64 @promoteZextShl( 981; DISABLE-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { 982; DISABLE-NEXT: [[ENTRY:.*:]] 983; DISABLE-NEXT: [[LD:%.*]] = load i16, ptr [[P]], align 2 984; DISABLE-NEXT: [[Z:%.*]] = zext i16 [[LD]] to i32 985; DISABLE-NEXT: br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]] 986; DISABLE: [[IF_THEN]]: 987; DISABLE-NEXT: [[SHL2:%.*]] = shl nsw i32 [[Z]], 1 988; DISABLE-NEXT: [[R:%.*]] = sext i32 [[SHL2]] to i64 989; DISABLE-NEXT: ret i64 [[R]] 990; DISABLE: [[END]]: 991; DISABLE-NEXT: ret i64 0 992; 993entry: 994 %ld = load i16, ptr %P 995 br i1 %c, label %end, label %if.then 996if.then: 997 %z = zext i16 %ld to i32 998 %shl2 = shl nsw i32 %z, 1 999 %r = sext i32 %shl2 to i64 1000 ret i64 %r 1001end: 1002 ret i64 0 1003} 1004