1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s 3 4define void @main(i1 %arg) #0 { 5; CHECK-LABEL: main: 6; CHECK: ; %bb.0: ; %bb 7; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 9; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill 10; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 11; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 12; CHECK-NEXT: s_mov_b64 exec, s[4:5] 13; CHECK-NEXT: v_writelane_b32 v5, s30, 0 14; CHECK-NEXT: v_writelane_b32 v5, s31, 1 15; CHECK-NEXT: v_writelane_b32 v5, s36, 2 16; CHECK-NEXT: v_writelane_b32 v5, s37, 3 17; CHECK-NEXT: v_writelane_b32 v5, s38, 4 18; CHECK-NEXT: v_writelane_b32 v5, s39, 5 19; CHECK-NEXT: v_writelane_b32 v5, s40, 6 20; CHECK-NEXT: v_writelane_b32 v5, s41, 7 21; CHECK-NEXT: v_writelane_b32 v5, s42, 8 22; CHECK-NEXT: v_writelane_b32 v5, s43, 9 23; CHECK-NEXT: v_writelane_b32 v5, s44, 10 24; CHECK-NEXT: v_writelane_b32 v5, s45, 11 25; CHECK-NEXT: v_writelane_b32 v5, s46, 12 26; CHECK-NEXT: v_writelane_b32 v5, s47, 13 27; CHECK-NEXT: v_writelane_b32 v5, s48, 14 28; CHECK-NEXT: v_writelane_b32 v5, s49, 15 29; CHECK-NEXT: s_getpc_b64 s[24:25] 30; CHECK-NEXT: v_writelane_b32 v5, s50, 16 31; CHECK-NEXT: s_movk_i32 s4, 0xf0 32; CHECK-NEXT: s_mov_b32 s5, s24 33; CHECK-NEXT: v_writelane_b32 v5, s51, 17 34; CHECK-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 35; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane 36; CHECK-NEXT: s_mov_b64 s[4:5], 0 37; CHECK-NEXT: s_load_dwordx4 s[28:31], s[4:5], 0x0 38; CHECK-NEXT: s_movk_i32 s20, 0x130 39; CHECK-NEXT: s_mov_b32 s21, s24 40; CHECK-NEXT: s_waitcnt lgkmcnt(0) 41; CHECK-NEXT: v_writelane_b32 v7, s36, 0 42; CHECK-NEXT: v_writelane_b32 v7, s37, 1 43; CHECK-NEXT: v_writelane_b32 v7, s38, 2 44; CHECK-NEXT: v_writelane_b32 v7, s39, 3 45; CHECK-NEXT: v_writelane_b32 v7, s40, 4 46; CHECK-NEXT: v_writelane_b32 v7, s41, 5 47; CHECK-NEXT: v_writelane_b32 v7, s42, 6 48; CHECK-NEXT: v_writelane_b32 v7, s43, 7 49; CHECK-NEXT: v_writelane_b32 v7, s44, 8 50; CHECK-NEXT: v_writelane_b32 v7, s45, 9 51; CHECK-NEXT: v_writelane_b32 v7, s46, 10 52; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 53; CHECK-NEXT: v_writelane_b32 v7, s47, 11 54; CHECK-NEXT: v_writelane_b32 v7, s48, 12 55; CHECK-NEXT: s_mov_b32 s20, 0 56; CHECK-NEXT: v_mov_b32_e32 v1, 0 57; CHECK-NEXT: v_writelane_b32 v7, s49, 13 58; CHECK-NEXT: v_mov_b32_e32 v2, s28 59; CHECK-NEXT: v_mov_b32_e32 v3, v1 60; CHECK-NEXT: s_mov_b32 s21, s20 61; CHECK-NEXT: s_mov_b32 s22, s20 62; CHECK-NEXT: s_mov_b32 s23, s20 63; CHECK-NEXT: v_writelane_b32 v7, s50, 14 64; CHECK-NEXT: v_writelane_b32 v7, s51, 15 65; CHECK-NEXT: image_sample_lz v3, v[2:3], s[44:51], s[20:23] dmask:0x1 66; CHECK-NEXT: v_mov_b32_e32 v2, v1 67; CHECK-NEXT: s_waitcnt lgkmcnt(0) 68; CHECK-NEXT: v_writelane_b32 v7, s4, 16 69; CHECK-NEXT: v_writelane_b32 v7, s5, 17 70; CHECK-NEXT: v_writelane_b32 v7, s6, 18 71; CHECK-NEXT: v_writelane_b32 v7, s7, 19 72; CHECK-NEXT: v_writelane_b32 v7, s8, 20 73; CHECK-NEXT: v_writelane_b32 v7, s9, 21 74; CHECK-NEXT: image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1 75; CHECK-NEXT: v_writelane_b32 v7, s10, 22 76; CHECK-NEXT: v_writelane_b32 v7, s11, 23 77; CHECK-NEXT: v_writelane_b32 v7, s12, 24 78; CHECK-NEXT: v_writelane_b32 v7, s13, 25 79; CHECK-NEXT: v_writelane_b32 v7, s14, 26 80; CHECK-NEXT: v_writelane_b32 v7, s15, 27 81; CHECK-NEXT: v_writelane_b32 v5, s52, 18 82; CHECK-NEXT: v_writelane_b32 v7, s16, 28 83; CHECK-NEXT: v_writelane_b32 v5, s53, 19 84; CHECK-NEXT: v_writelane_b32 v7, s17, 29 85; CHECK-NEXT: v_writelane_b32 v5, s54, 20 86; CHECK-NEXT: v_writelane_b32 v7, s18, 30 87; CHECK-NEXT: s_mov_b32 s26, 48 88; CHECK-NEXT: s_mov_b32 s27, s24 89; CHECK-NEXT: v_writelane_b32 v5, s55, 21 90; CHECK-NEXT: v_writelane_b32 v7, s19, 31 91; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0 92; CHECK-NEXT: v_writelane_b32 v5, s56, 22 93; CHECK-NEXT: v_writelane_b32 v5, s57, 23 94; CHECK-NEXT: v_writelane_b32 v5, s58, 24 95; CHECK-NEXT: v_writelane_b32 v5, s59, 25 96; CHECK-NEXT: v_writelane_b32 v5, s60, 26 97; CHECK-NEXT: s_waitcnt lgkmcnt(0) 98; CHECK-NEXT: v_writelane_b32 v7, s4, 32 99; CHECK-NEXT: v_writelane_b32 v5, s61, 27 100; CHECK-NEXT: v_writelane_b32 v7, s5, 33 101; CHECK-NEXT: v_writelane_b32 v5, s62, 28 102; CHECK-NEXT: v_writelane_b32 v7, s6, 34 103; CHECK-NEXT: v_writelane_b32 v5, s63, 29 104; CHECK-NEXT: v_writelane_b32 v7, s7, 35 105; CHECK-NEXT: v_writelane_b32 v5, s64, 30 106; CHECK-NEXT: v_writelane_b32 v7, s8, 36 107; CHECK-NEXT: v_writelane_b32 v5, s65, 31 108; CHECK-NEXT: v_writelane_b32 v7, s9, 37 109; CHECK-NEXT: v_writelane_b32 v5, s66, 32 110; CHECK-NEXT: s_movk_i32 s28, 0x1f0 111; CHECK-NEXT: s_movk_i32 s30, 0x2f0 112; CHECK-NEXT: s_mov_b32 s29, s24 113; CHECK-NEXT: s_mov_b32 s31, s24 114; CHECK-NEXT: v_writelane_b32 v7, s10, 38 115; CHECK-NEXT: v_writelane_b32 v5, s67, 33 116; CHECK-NEXT: v_writelane_b32 v7, s11, 39 117; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0 118; CHECK-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 119; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 120; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 121; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1 122; CHECK-NEXT: s_waitcnt vmcnt(0) 123; CHECK-NEXT: v_mul_f32_e32 v0, v4, v3 124; CHECK-NEXT: s_and_saveexec_b64 s[26:27], s[24:25] 125; CHECK-NEXT: s_xor_b64 s[26:27], exec, s[26:27] 126; CHECK-NEXT: s_cbranch_execz .LBB0_3 127; CHECK-NEXT: ; %bb.1: ; %bb48 128; CHECK-NEXT: v_readlane_b32 s36, v7, 0 129; CHECK-NEXT: v_readlane_b32 s44, v7, 8 130; CHECK-NEXT: v_readlane_b32 s45, v7, 9 131; CHECK-NEXT: v_readlane_b32 s46, v7, 10 132; CHECK-NEXT: v_readlane_b32 s47, v7, 11 133; CHECK-NEXT: v_readlane_b32 s48, v7, 12 134; CHECK-NEXT: v_readlane_b32 s49, v7, 13 135; CHECK-NEXT: v_readlane_b32 s50, v7, 14 136; CHECK-NEXT: v_readlane_b32 s51, v7, 15 137; CHECK-NEXT: s_and_b64 vcc, exec, -1 138; CHECK-NEXT: v_readlane_b32 s37, v7, 1 139; CHECK-NEXT: v_readlane_b32 s38, v7, 2 140; CHECK-NEXT: v_readlane_b32 s39, v7, 3 141; CHECK-NEXT: v_readlane_b32 s40, v7, 4 142; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1 143; CHECK-NEXT: v_mov_b32_e32 v2, 0 144; CHECK-NEXT: v_readlane_b32 s41, v7, 5 145; CHECK-NEXT: v_readlane_b32 s42, v7, 6 146; CHECK-NEXT: v_readlane_b32 s43, v7, 7 147; CHECK-NEXT: .LBB0_2: ; %bb50 148; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 149; CHECK-NEXT: v_readlane_b32 s36, v7, 32 150; CHECK-NEXT: v_readlane_b32 s40, v7, 36 151; CHECK-NEXT: v_readlane_b32 s41, v7, 37 152; CHECK-NEXT: v_readlane_b32 s42, v7, 38 153; CHECK-NEXT: v_readlane_b32 s43, v7, 39 154; CHECK-NEXT: s_mov_b32 s21, s20 155; CHECK-NEXT: s_mov_b32 s22, s20 156; CHECK-NEXT: s_mov_b32 s23, s20 157; CHECK-NEXT: v_readlane_b32 s37, v7, 33 158; CHECK-NEXT: v_readlane_b32 s38, v7, 34 159; CHECK-NEXT: s_waitcnt lgkmcnt(0) 160; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1 161; CHECK-NEXT: v_readlane_b32 s39, v7, 35 162; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 163; CHECK-NEXT: s_waitcnt vmcnt(0) 164; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 165; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0 166; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3 167; CHECK-NEXT: s_mov_b64 vcc, vcc 168; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 169; CHECK-NEXT: .LBB0_3: ; %Flow14 170; CHECK-NEXT: s_waitcnt lgkmcnt(0) 171; CHECK-NEXT: v_readlane_b32 s12, v7, 32 172; CHECK-NEXT: v_readlane_b32 s13, v7, 33 173; CHECK-NEXT: v_readlane_b32 s14, v7, 34 174; CHECK-NEXT: v_readlane_b32 s15, v7, 35 175; CHECK-NEXT: v_readlane_b32 s16, v7, 36 176; CHECK-NEXT: v_readlane_b32 s17, v7, 37 177; CHECK-NEXT: v_readlane_b32 s18, v7, 38 178; CHECK-NEXT: v_readlane_b32 s19, v7, 39 179; CHECK-NEXT: v_writelane_b32 v7, s4, 40 180; CHECK-NEXT: v_writelane_b32 v7, s5, 41 181; CHECK-NEXT: v_writelane_b32 v7, s6, 42 182; CHECK-NEXT: v_writelane_b32 v7, s7, 43 183; CHECK-NEXT: v_writelane_b32 v7, s8, 44 184; CHECK-NEXT: v_writelane_b32 v7, s9, 45 185; CHECK-NEXT: v_writelane_b32 v7, s10, 46 186; CHECK-NEXT: v_writelane_b32 v7, s11, 47 187; CHECK-NEXT: v_writelane_b32 v7, s12, 48 188; CHECK-NEXT: v_writelane_b32 v7, s13, 49 189; CHECK-NEXT: v_writelane_b32 v7, s14, 50 190; CHECK-NEXT: v_writelane_b32 v7, s15, 51 191; CHECK-NEXT: v_writelane_b32 v7, s16, 52 192; CHECK-NEXT: v_writelane_b32 v7, s17, 53 193; CHECK-NEXT: v_writelane_b32 v7, s18, 54 194; CHECK-NEXT: v_writelane_b32 v7, s19, 55 195; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane 196; CHECK-NEXT: v_writelane_b32 v7, s52, 56 197; CHECK-NEXT: v_writelane_b32 v6, s60, 0 198; CHECK-NEXT: v_writelane_b32 v7, s53, 57 199; CHECK-NEXT: v_writelane_b32 v6, s61, 1 200; CHECK-NEXT: v_writelane_b32 v7, s54, 58 201; CHECK-NEXT: v_writelane_b32 v6, s62, 2 202; CHECK-NEXT: v_writelane_b32 v7, s55, 59 203; CHECK-NEXT: v_writelane_b32 v6, s63, 3 204; CHECK-NEXT: v_writelane_b32 v7, s56, 60 205; CHECK-NEXT: v_writelane_b32 v6, s64, 4 206; CHECK-NEXT: v_writelane_b32 v7, s57, 61 207; CHECK-NEXT: v_writelane_b32 v6, s65, 5 208; CHECK-NEXT: v_writelane_b32 v7, s58, 62 209; CHECK-NEXT: v_writelane_b32 v6, s66, 6 210; CHECK-NEXT: v_writelane_b32 v7, s59, 63 211; CHECK-NEXT: v_writelane_b32 v6, s67, 7 212; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27] 213; CHECK-NEXT: s_cbranch_execz .LBB0_10 214; CHECK-NEXT: ; %bb.4: ; %bb32 215; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[24:25] 216; CHECK-NEXT: s_xor_b64 s[22:23], exec, s[8:9] 217; CHECK-NEXT: s_cbranch_execz .LBB0_6 218; CHECK-NEXT: ; %bb.5: ; %bb43 219; CHECK-NEXT: s_mov_b32 s8, 0 220; CHECK-NEXT: s_mov_b32 s9, s8 221; CHECK-NEXT: v_mov_b32_e32 v0, s8 222; CHECK-NEXT: v_readlane_b32 s36, v7, 0 223; CHECK-NEXT: v_mov_b32_e32 v1, s9 224; CHECK-NEXT: s_mov_b32 s10, s8 225; CHECK-NEXT: s_mov_b32 s11, s8 226; CHECK-NEXT: v_readlane_b32 s37, v7, 1 227; CHECK-NEXT: v_readlane_b32 s38, v7, 2 228; CHECK-NEXT: v_readlane_b32 s39, v7, 3 229; CHECK-NEXT: v_readlane_b32 s40, v7, 4 230; CHECK-NEXT: v_readlane_b32 s41, v7, 5 231; CHECK-NEXT: v_readlane_b32 s42, v7, 6 232; CHECK-NEXT: v_readlane_b32 s43, v7, 7 233; CHECK-NEXT: v_readlane_b32 s44, v7, 8 234; CHECK-NEXT: v_readlane_b32 s45, v7, 9 235; CHECK-NEXT: v_readlane_b32 s46, v7, 10 236; CHECK-NEXT: v_readlane_b32 s47, v7, 11 237; CHECK-NEXT: v_readlane_b32 s48, v7, 12 238; CHECK-NEXT: v_readlane_b32 s49, v7, 13 239; CHECK-NEXT: v_readlane_b32 s50, v7, 14 240; CHECK-NEXT: v_readlane_b32 s51, v7, 15 241; CHECK-NEXT: image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1 242; CHECK-NEXT: v_readlane_b32 s36, v7, 16 243; CHECK-NEXT: v_readlane_b32 s44, v7, 24 244; CHECK-NEXT: v_readlane_b32 s45, v7, 25 245; CHECK-NEXT: v_readlane_b32 s46, v7, 26 246; CHECK-NEXT: v_readlane_b32 s47, v7, 27 247; CHECK-NEXT: v_readlane_b32 s48, v7, 28 248; CHECK-NEXT: v_readlane_b32 s49, v7, 29 249; CHECK-NEXT: v_readlane_b32 s50, v7, 30 250; CHECK-NEXT: v_readlane_b32 s51, v7, 31 251; CHECK-NEXT: v_mov_b32_e32 v3, 0 252; CHECK-NEXT: v_mov_b32_e32 v4, v3 253; CHECK-NEXT: v_readlane_b32 s37, v7, 17 254; CHECK-NEXT: v_readlane_b32 s38, v7, 18 255; CHECK-NEXT: v_readlane_b32 s39, v7, 19 256; CHECK-NEXT: image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1 257; CHECK-NEXT: v_readlane_b32 s40, v7, 20 258; CHECK-NEXT: v_readlane_b32 s41, v7, 21 259; CHECK-NEXT: v_readlane_b32 s42, v7, 22 260; CHECK-NEXT: v_readlane_b32 s43, v7, 23 261; CHECK-NEXT: s_waitcnt vmcnt(1) 262; CHECK-NEXT: buffer_store_dwordx3 v[2:4], off, s[8:11], 0 263; CHECK-NEXT: s_waitcnt vmcnt(1) 264; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 265; CHECK-NEXT: ; implicit-def: $vgpr0 266; CHECK-NEXT: .LBB0_6: ; %Flow12 267; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23] 268; CHECK-NEXT: v_readlane_b32 s52, v7, 40 269; CHECK-NEXT: v_readlane_b32 s53, v7, 41 270; CHECK-NEXT: v_readlane_b32 s54, v7, 42 271; CHECK-NEXT: v_readlane_b32 s55, v7, 43 272; CHECK-NEXT: v_readlane_b32 s56, v7, 44 273; CHECK-NEXT: v_readlane_b32 s57, v7, 45 274; CHECK-NEXT: v_readlane_b32 s58, v7, 46 275; CHECK-NEXT: v_readlane_b32 s59, v7, 47 276; CHECK-NEXT: v_readlane_b32 s60, v7, 48 277; CHECK-NEXT: v_readlane_b32 s61, v7, 49 278; CHECK-NEXT: v_readlane_b32 s62, v7, 50 279; CHECK-NEXT: v_readlane_b32 s63, v7, 51 280; CHECK-NEXT: v_readlane_b32 s64, v7, 52 281; CHECK-NEXT: v_readlane_b32 s65, v7, 53 282; CHECK-NEXT: v_readlane_b32 s66, v7, 54 283; CHECK-NEXT: v_readlane_b32 s67, v7, 55 284; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] 285; CHECK-NEXT: s_cbranch_execz .LBB0_9 286; CHECK-NEXT: ; %bb.7: ; %bb33.preheader 287; CHECK-NEXT: s_mov_b32 s8, 0 288; CHECK-NEXT: s_mov_b32 s6, s8 289; CHECK-NEXT: s_mov_b32 s7, s8 290; CHECK-NEXT: v_mov_b32_e32 v1, s6 291; CHECK-NEXT: v_readlane_b32 s36, v7, 56 292; CHECK-NEXT: s_mov_b32 s9, s8 293; CHECK-NEXT: s_mov_b32 s10, s8 294; CHECK-NEXT: s_mov_b32 s11, s8 295; CHECK-NEXT: v_mov_b32_e32 v2, s7 296; CHECK-NEXT: v_readlane_b32 s37, v7, 57 297; CHECK-NEXT: v_readlane_b32 s38, v7, 58 298; CHECK-NEXT: v_readlane_b32 s39, v7, 59 299; CHECK-NEXT: v_readlane_b32 s40, v7, 60 300; CHECK-NEXT: v_readlane_b32 s41, v7, 61 301; CHECK-NEXT: v_readlane_b32 s42, v7, 62 302; CHECK-NEXT: v_readlane_b32 s43, v7, 63 303; CHECK-NEXT: s_nop 4 304; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1 305; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1 306; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2 307; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] 308; CHECK-NEXT: s_and_b64 vcc, exec, 0 309; CHECK-NEXT: v_readlane_b32 s44, v6, 0 310; CHECK-NEXT: v_readlane_b32 s45, v6, 1 311; CHECK-NEXT: v_readlane_b32 s46, v6, 2 312; CHECK-NEXT: v_readlane_b32 s47, v6, 3 313; CHECK-NEXT: v_readlane_b32 s48, v6, 4 314; CHECK-NEXT: v_readlane_b32 s49, v6, 5 315; CHECK-NEXT: v_readlane_b32 s50, v6, 6 316; CHECK-NEXT: v_readlane_b32 s51, v6, 7 317; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] 318; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] 319; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] 320; CHECK-NEXT: ; kill: killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 321; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11 322; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 323; CHECK-NEXT: s_waitcnt vmcnt(0) 324; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3 325; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0 326; CHECK-NEXT: v_mov_b32_e32 v1, 0 327; CHECK-NEXT: .LBB0_8: ; %bb33 328; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 329; CHECK-NEXT: v_add_f32_e32 v2, v1, v0 330; CHECK-NEXT: v_sub_f32_e32 v1, v1, v2 331; CHECK-NEXT: s_mov_b64 vcc, vcc 332; CHECK-NEXT: s_cbranch_vccz .LBB0_8 333; CHECK-NEXT: .LBB0_9: ; %Flow13 334; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 335; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock 336; CHECK-NEXT: s_or_b64 exec, exec, s[20:21] 337; CHECK-NEXT: v_readlane_b32 s67, v5, 33 338; CHECK-NEXT: v_readlane_b32 s66, v5, 32 339; CHECK-NEXT: v_readlane_b32 s65, v5, 31 340; CHECK-NEXT: v_readlane_b32 s64, v5, 30 341; CHECK-NEXT: v_readlane_b32 s63, v5, 29 342; CHECK-NEXT: v_readlane_b32 s62, v5, 28 343; CHECK-NEXT: v_readlane_b32 s61, v5, 27 344; CHECK-NEXT: v_readlane_b32 s60, v5, 26 345; CHECK-NEXT: v_readlane_b32 s59, v5, 25 346; CHECK-NEXT: v_readlane_b32 s58, v5, 24 347; CHECK-NEXT: v_readlane_b32 s57, v5, 23 348; CHECK-NEXT: v_readlane_b32 s56, v5, 22 349; CHECK-NEXT: v_readlane_b32 s55, v5, 21 350; CHECK-NEXT: v_readlane_b32 s54, v5, 20 351; CHECK-NEXT: v_readlane_b32 s53, v5, 19 352; CHECK-NEXT: v_readlane_b32 s52, v5, 18 353; CHECK-NEXT: v_readlane_b32 s51, v5, 17 354; CHECK-NEXT: v_readlane_b32 s50, v5, 16 355; CHECK-NEXT: v_readlane_b32 s49, v5, 15 356; CHECK-NEXT: v_readlane_b32 s48, v5, 14 357; CHECK-NEXT: v_readlane_b32 s47, v5, 13 358; CHECK-NEXT: v_readlane_b32 s46, v5, 12 359; CHECK-NEXT: v_readlane_b32 s45, v5, 11 360; CHECK-NEXT: v_readlane_b32 s44, v5, 10 361; CHECK-NEXT: v_readlane_b32 s43, v5, 9 362; CHECK-NEXT: v_readlane_b32 s42, v5, 8 363; CHECK-NEXT: v_readlane_b32 s41, v5, 7 364; CHECK-NEXT: v_readlane_b32 s40, v5, 6 365; CHECK-NEXT: v_readlane_b32 s39, v5, 5 366; CHECK-NEXT: v_readlane_b32 s38, v5, 4 367; CHECK-NEXT: v_readlane_b32 s37, v5, 3 368; CHECK-NEXT: v_readlane_b32 s36, v5, 2 369; CHECK-NEXT: v_readlane_b32 s31, v5, 1 370; CHECK-NEXT: v_readlane_b32 s30, v5, 0 371; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 372; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload 373; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 374; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 375; CHECK-NEXT: s_mov_b64 exec, s[4:5] 376; CHECK-NEXT: s_waitcnt vmcnt(0) 377; CHECK-NEXT: s_setpc_b64 s[30:31] 378bb: 379 %i = call i64 @llvm.amdgcn.s.getpc() 380 %i1 = trunc i64 %i to i32 381 %i2 = insertelement <2 x i32> zeroinitializer, i32 %i1, i64 1 382 %i3 = bitcast <2 x i32> %i2 to i64 383 %i4 = inttoptr i64 %i3 to ptr addrspace(4) 384 %i5 = getelementptr i8, ptr addrspace(4) %i4, i64 48 385 %i6 = load <4 x i32>, ptr addrspace(4) %i5, align 16 386 %i7 = getelementptr i8, ptr addrspace(4) %i4, i64 64 387 %i8 = load <4 x i32>, ptr addrspace(4) %i7, align 16 388 %i9 = getelementptr i8, ptr addrspace(4) %i4, i64 240 389 %i10 = load <8 x i32>, ptr addrspace(4) %i9, align 32 390 %i11 = getelementptr i8, ptr addrspace(4) %i4, i64 272 391 %i12 = load <8 x i32>, ptr addrspace(4) %i11, align 32 392 %i13 = getelementptr i8, ptr addrspace(4) %i4, i64 304 393 %i14 = load <8 x i32>, ptr addrspace(4) %i13, align 32 394 %i15 = getelementptr i8, ptr addrspace(4) %i4, i64 336 395 %i16 = load <8 x i32>, ptr addrspace(4) %i15, align 32 396 %i17 = getelementptr i8, ptr addrspace(4) %i4, i64 496 397 %i18 = load <8 x i32>, ptr addrspace(4) %i17, align 32 398 %i19 = getelementptr i8, ptr addrspace(4) %i4, i64 528 399 %i20 = load <8 x i32>, ptr addrspace(4) %i19, align 32 400 %i21 = getelementptr i8, ptr addrspace(4) %i4, i64 752 401 %i22 = load <8 x i32>, ptr addrspace(4) %i21, align 32 402 %i23 = getelementptr i8, ptr addrspace(4) %i4, i64 784 403 %i24 = load <8 x i32>, ptr addrspace(4) %i23, align 32 404 %i25 = load <4 x float>, ptr addrspace(4) null, align 16 405 %i26 = extractelement <4 x float> %i25, i64 0 406 %i27 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float %i26, float 0.000000e+00, <8 x i32> %i12, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 407 %i28 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i14, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 408 %i29 = extractelement <4 x float> %i28, i64 0 409 %i30 = fmul float %i29, %i27 410 %i31 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i16, <4 x i32> %i6, i1 false, i32 0, i32 0) 411 br i1 %arg, label %bb32, label %bb48 412 413bb32: ; preds = %bb 414 br i1 %arg, label %bb33, label %bb43 415 416bb33: ; preds = %bb33, %bb32 417 %i34 = phi float [ %i42, %bb33 ], [ 0.000000e+00, %bb32 ] 418 %i35 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i18, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 419 %i36 = extractelement <2 x float> %i35, i64 0 420 %i37 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i22, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 421 %i38 = extractelement <2 x float> %i37, i64 0 422 %i39 = fsub float %i38, %i36 423 %i40 = fmul float %i39, %i30 424 %i41 = fadd float %i34, %i40 425 %i42 = fsub float %i34, %i41 426 br label %bb33 427 428bb43: ; preds = %bb32 429 %i44 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i10, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 430 %i45 = bitcast float %i44 to i32 431 %i46 = insertelement <3 x i32> zeroinitializer, i32 %i45, i64 0 432 call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %i46, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) 433 %i47 = bitcast <4 x float> %i31 to <4 x i32> 434 call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %i47, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) 435 ret void 436 437bb48: ; preds = %bb 438 %i49 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i12, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 439 br label %bb50 440 441bb50: ; preds = %bb50, %bb48 442 %i51 = phi float [ 0.000000e+00, %bb48 ], [ %i58, %bb50 ] 443 %i52 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float %i51, float 0.000000e+00, <8 x i32> %i20, <4 x i32> %i8, i1 false, i32 0, i32 0) 444 %i53 = extractelement <2 x float> %i52, i64 0 445 %i54 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float %i51, float 0.000000e+00, <8 x i32> %i24, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) 446 %i55 = extractelement <2 x float> %i54, i64 0 447 %i56 = fsub float %i55, %i53 448 %i57 = fmul float %i56, %i30 449 %i58 = fmul float %i57, %i49 450 br label %bb50 451} 452 453declare i64 @llvm.amdgcn.s.getpc() #1 454declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2 455declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2 456declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2 457declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg) #3 458declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #3 459 460attributes #0 = { "amdgpu-waves-per-eu"="10,10" } 461attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 462attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) } 463attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } 464