1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s 5 6define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 { 7; GFX8-LABEL: constant_load_i8_align4: 8; GFX8: ; %bb.0: 9; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 10; GFX8-NEXT: s_waitcnt lgkmcnt(0) 11; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 12; GFX8-NEXT: v_mov_b32_e32 v0, s0 13; GFX8-NEXT: v_mov_b32_e32 v1, s1 14; GFX8-NEXT: s_waitcnt lgkmcnt(0) 15; GFX8-NEXT: v_mov_b32_e32 v2, s2 16; GFX8-NEXT: flat_store_byte v[0:1], v2 17; GFX8-NEXT: s_endpgm 18; 19; GFX9-LABEL: constant_load_i8_align4: 20; GFX9: ; %bb.0: 21; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 22; GFX9-NEXT: v_mov_b32_e32 v1, 0 23; GFX9-NEXT: s_waitcnt lgkmcnt(0) 24; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 25; GFX9-NEXT: s_waitcnt lgkmcnt(0) 26; GFX9-NEXT: v_mov_b32_e32 v0, s2 27; GFX9-NEXT: global_store_byte v1, v0, s[0:1] 28; GFX9-NEXT: s_endpgm 29; 30; GFX10-LABEL: constant_load_i8_align4: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 33; GFX10-NEXT: v_mov_b32_e32 v1, 0 34; GFX10-NEXT: s_waitcnt lgkmcnt(0) 35; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 36; GFX10-NEXT: s_waitcnt lgkmcnt(0) 37; GFX10-NEXT: v_mov_b32_e32 v0, s2 38; GFX10-NEXT: global_store_byte v1, v0, s[0:1] 39; GFX10-NEXT: s_endpgm 40 %ld = load i8, ptr addrspace(4) %in, align 4 41 store i8 %ld, ptr addrspace(1) %out, align 4 42 ret void 43} 44 45define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 { 46; GFX8-LABEL: constant_load_i16_align4: 47; GFX8: ; %bb.0: 48; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 49; GFX8-NEXT: s_waitcnt lgkmcnt(0) 50; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 51; GFX8-NEXT: v_mov_b32_e32 v0, s0 52; GFX8-NEXT: v_mov_b32_e32 v1, s1 53; GFX8-NEXT: s_waitcnt lgkmcnt(0) 54; GFX8-NEXT: v_mov_b32_e32 v2, s2 55; GFX8-NEXT: flat_store_short v[0:1], v2 56; GFX8-NEXT: s_endpgm 57; 58; GFX9-LABEL: constant_load_i16_align4: 59; GFX9: ; %bb.0: 60; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 61; GFX9-NEXT: v_mov_b32_e32 v1, 0 62; GFX9-NEXT: s_waitcnt lgkmcnt(0) 63; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 64; GFX9-NEXT: s_waitcnt lgkmcnt(0) 65; GFX9-NEXT: v_mov_b32_e32 v0, s2 66; GFX9-NEXT: global_store_short v1, v0, s[0:1] 67; GFX9-NEXT: s_endpgm 68; 69; GFX10-LABEL: constant_load_i16_align4: 70; GFX10: ; %bb.0: 71; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 72; GFX10-NEXT: v_mov_b32_e32 v1, 0 73; GFX10-NEXT: s_waitcnt lgkmcnt(0) 74; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 75; GFX10-NEXT: s_waitcnt lgkmcnt(0) 76; GFX10-NEXT: v_mov_b32_e32 v0, s2 77; GFX10-NEXT: global_store_short v1, v0, s[0:1] 78; GFX10-NEXT: s_endpgm 79 %ld = load i16, ptr addrspace(4) %in, align 4 80 store i16 %ld, ptr addrspace(1) %out, align 4 81 ret void 82} 83 84define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 85; GFX8-LABEL: sextload_i8_to_i32_align4: 86; GFX8: ; %bb.0: 87; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 88; GFX8-NEXT: s_waitcnt lgkmcnt(0) 89; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 90; GFX8-NEXT: v_mov_b32_e32 v0, s0 91; GFX8-NEXT: v_mov_b32_e32 v1, s1 92; GFX8-NEXT: s_waitcnt lgkmcnt(0) 93; GFX8-NEXT: s_sext_i32_i8 s2, s2 94; GFX8-NEXT: v_mov_b32_e32 v2, s2 95; GFX8-NEXT: flat_store_dword v[0:1], v2 96; GFX8-NEXT: s_endpgm 97; 98; GFX9-LABEL: sextload_i8_to_i32_align4: 99; GFX9: ; %bb.0: 100; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 101; GFX9-NEXT: v_mov_b32_e32 v1, 0 102; GFX9-NEXT: s_waitcnt lgkmcnt(0) 103; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 104; GFX9-NEXT: s_waitcnt lgkmcnt(0) 105; GFX9-NEXT: s_sext_i32_i8 s2, s2 106; GFX9-NEXT: v_mov_b32_e32 v0, s2 107; GFX9-NEXT: global_store_dword v1, v0, s[0:1] 108; GFX9-NEXT: s_endpgm 109; 110; GFX10-LABEL: sextload_i8_to_i32_align4: 111; GFX10: ; %bb.0: 112; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 113; GFX10-NEXT: v_mov_b32_e32 v1, 0 114; GFX10-NEXT: s_waitcnt lgkmcnt(0) 115; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 116; GFX10-NEXT: s_waitcnt lgkmcnt(0) 117; GFX10-NEXT: s_sext_i32_i8 s2, s2 118; GFX10-NEXT: v_mov_b32_e32 v0, s2 119; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 120; GFX10-NEXT: s_endpgm 121 %load = load i8, ptr addrspace(1) %in, align 4 122 %sext = sext i8 %load to i32 123 store i32 %sext, ptr addrspace(1) %out, align 4 124 ret void 125} 126 127define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 128; GFX8-LABEL: sextload_i16_to_i32_align4: 129; GFX8: ; %bb.0: 130; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 131; GFX8-NEXT: s_waitcnt lgkmcnt(0) 132; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 133; GFX8-NEXT: v_mov_b32_e32 v0, s0 134; GFX8-NEXT: v_mov_b32_e32 v1, s1 135; GFX8-NEXT: s_waitcnt lgkmcnt(0) 136; GFX8-NEXT: s_sext_i32_i16 s2, s2 137; GFX8-NEXT: v_mov_b32_e32 v2, s2 138; GFX8-NEXT: flat_store_dword v[0:1], v2 139; GFX8-NEXT: s_endpgm 140; 141; GFX9-LABEL: sextload_i16_to_i32_align4: 142; GFX9: ; %bb.0: 143; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 144; GFX9-NEXT: v_mov_b32_e32 v1, 0 145; GFX9-NEXT: s_waitcnt lgkmcnt(0) 146; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 147; GFX9-NEXT: s_waitcnt lgkmcnt(0) 148; GFX9-NEXT: s_sext_i32_i16 s2, s2 149; GFX9-NEXT: v_mov_b32_e32 v0, s2 150; GFX9-NEXT: global_store_dword v1, v0, s[0:1] 151; GFX9-NEXT: s_endpgm 152; 153; GFX10-LABEL: sextload_i16_to_i32_align4: 154; GFX10: ; %bb.0: 155; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 156; GFX10-NEXT: v_mov_b32_e32 v1, 0 157; GFX10-NEXT: s_waitcnt lgkmcnt(0) 158; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 159; GFX10-NEXT: s_waitcnt lgkmcnt(0) 160; GFX10-NEXT: s_sext_i32_i16 s2, s2 161; GFX10-NEXT: v_mov_b32_e32 v0, s2 162; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 163; GFX10-NEXT: s_endpgm 164 %load = load i16, ptr addrspace(1) %in, align 4 165 %sext = sext i16 %load to i32 166 store i32 %sext, ptr addrspace(1) %out, align 4 167 ret void 168} 169 170define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 171; GFX8-LABEL: zextload_i8_to_i32_align4: 172; GFX8: ; %bb.0: 173; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 174; GFX8-NEXT: s_waitcnt lgkmcnt(0) 175; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 176; GFX8-NEXT: v_mov_b32_e32 v0, s0 177; GFX8-NEXT: v_mov_b32_e32 v1, s1 178; GFX8-NEXT: s_waitcnt lgkmcnt(0) 179; GFX8-NEXT: s_and_b32 s2, s2, 0xff 180; GFX8-NEXT: v_mov_b32_e32 v2, s2 181; GFX8-NEXT: flat_store_dword v[0:1], v2 182; GFX8-NEXT: s_endpgm 183; 184; GFX9-LABEL: zextload_i8_to_i32_align4: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 187; GFX9-NEXT: v_mov_b32_e32 v1, 0 188; GFX9-NEXT: s_waitcnt lgkmcnt(0) 189; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 190; GFX9-NEXT: s_waitcnt lgkmcnt(0) 191; GFX9-NEXT: s_and_b32 s2, s2, 0xff 192; GFX9-NEXT: v_mov_b32_e32 v0, s2 193; GFX9-NEXT: global_store_dword v1, v0, s[0:1] 194; GFX9-NEXT: s_endpgm 195; 196; GFX10-LABEL: zextload_i8_to_i32_align4: 197; GFX10: ; %bb.0: 198; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 199; GFX10-NEXT: v_mov_b32_e32 v1, 0 200; GFX10-NEXT: s_waitcnt lgkmcnt(0) 201; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 202; GFX10-NEXT: s_waitcnt lgkmcnt(0) 203; GFX10-NEXT: s_and_b32 s2, s2, 0xff 204; GFX10-NEXT: v_mov_b32_e32 v0, s2 205; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 206; GFX10-NEXT: s_endpgm 207 %load = load i8, ptr addrspace(1) %in, align 4 208 %zext = zext i8 %load to i32 209 store i32 %zext, ptr addrspace(1) %out, align 4 210 ret void 211} 212 213define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 214; GFX8-LABEL: zextload_i16_to_i32_align4: 215; GFX8: ; %bb.0: 216; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 217; GFX8-NEXT: s_waitcnt lgkmcnt(0) 218; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 219; GFX8-NEXT: v_mov_b32_e32 v0, s0 220; GFX8-NEXT: v_mov_b32_e32 v1, s1 221; GFX8-NEXT: s_waitcnt lgkmcnt(0) 222; GFX8-NEXT: s_and_b32 s2, s2, 0xffff 223; GFX8-NEXT: v_mov_b32_e32 v2, s2 224; GFX8-NEXT: flat_store_dword v[0:1], v2 225; GFX8-NEXT: s_endpgm 226; 227; GFX9-LABEL: zextload_i16_to_i32_align4: 228; GFX9: ; %bb.0: 229; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 230; GFX9-NEXT: v_mov_b32_e32 v1, 0 231; GFX9-NEXT: s_waitcnt lgkmcnt(0) 232; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 233; GFX9-NEXT: s_waitcnt lgkmcnt(0) 234; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 235; GFX9-NEXT: v_mov_b32_e32 v0, s2 236; GFX9-NEXT: global_store_dword v1, v0, s[0:1] 237; GFX9-NEXT: s_endpgm 238; 239; GFX10-LABEL: zextload_i16_to_i32_align4: 240; GFX10: ; %bb.0: 241; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 242; GFX10-NEXT: v_mov_b32_e32 v1, 0 243; GFX10-NEXT: s_waitcnt lgkmcnt(0) 244; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0 245; GFX10-NEXT: s_waitcnt lgkmcnt(0) 246; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 247; GFX10-NEXT: v_mov_b32_e32 v0, s2 248; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 249; GFX10-NEXT: s_endpgm 250 %load = load i16, ptr addrspace(1) %in, align 4 251 %zext = zext i16 %load to i32 252 store i32 %zext, ptr addrspace(1) %out, align 4 253 ret void 254} 255 256define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 257; GFX8-LABEL: constant_load_i8_align2: 258; GFX8: ; %bb.0: 259; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 260; GFX8-NEXT: s_waitcnt lgkmcnt(0) 261; GFX8-NEXT: v_mov_b32_e32 v0, s2 262; GFX8-NEXT: v_mov_b32_e32 v1, s3 263; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 264; GFX8-NEXT: v_mov_b32_e32 v0, s0 265; GFX8-NEXT: v_mov_b32_e32 v1, s1 266; GFX8-NEXT: s_waitcnt vmcnt(0) 267; GFX8-NEXT: flat_store_byte v[0:1], v2 268; GFX8-NEXT: s_endpgm 269; 270; GFX9-LABEL: constant_load_i8_align2: 271; GFX9: ; %bb.0: 272; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 273; GFX9-NEXT: v_mov_b32_e32 v0, 0 274; GFX9-NEXT: s_waitcnt lgkmcnt(0) 275; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] 276; GFX9-NEXT: s_waitcnt vmcnt(0) 277; GFX9-NEXT: global_store_byte v0, v1, s[0:1] 278; GFX9-NEXT: s_endpgm 279; 280; GFX10-LABEL: constant_load_i8_align2: 281; GFX10: ; %bb.0: 282; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 283; GFX10-NEXT: v_mov_b32_e32 v0, 0 284; GFX10-NEXT: s_waitcnt lgkmcnt(0) 285; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] 286; GFX10-NEXT: s_waitcnt vmcnt(0) 287; GFX10-NEXT: global_store_byte v0, v1, s[0:1] 288; GFX10-NEXT: s_endpgm 289 %load = load i8, ptr addrspace(1) %in, align 2 290 store i8 %load, ptr addrspace(1) %out, align 2 291 ret void 292} 293 294define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 295; GFX8-LABEL: constant_load_i16_align2: 296; GFX8: ; %bb.0: 297; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 298; GFX8-NEXT: s_waitcnt lgkmcnt(0) 299; GFX8-NEXT: v_mov_b32_e32 v0, s2 300; GFX8-NEXT: v_mov_b32_e32 v1, s3 301; GFX8-NEXT: flat_load_ushort v2, v[0:1] 302; GFX8-NEXT: v_mov_b32_e32 v0, s0 303; GFX8-NEXT: v_mov_b32_e32 v1, s1 304; GFX8-NEXT: s_waitcnt vmcnt(0) 305; GFX8-NEXT: flat_store_short v[0:1], v2 306; GFX8-NEXT: s_endpgm 307; 308; GFX9-LABEL: constant_load_i16_align2: 309; GFX9: ; %bb.0: 310; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 311; GFX9-NEXT: v_mov_b32_e32 v0, 0 312; GFX9-NEXT: s_waitcnt lgkmcnt(0) 313; GFX9-NEXT: global_load_ushort v1, v0, s[2:3] 314; GFX9-NEXT: s_waitcnt vmcnt(0) 315; GFX9-NEXT: global_store_short v0, v1, s[0:1] 316; GFX9-NEXT: s_endpgm 317; 318; GFX10-LABEL: constant_load_i16_align2: 319; GFX10: ; %bb.0: 320; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 321; GFX10-NEXT: v_mov_b32_e32 v0, 0 322; GFX10-NEXT: s_waitcnt lgkmcnt(0) 323; GFX10-NEXT: global_load_ushort v1, v0, s[2:3] 324; GFX10-NEXT: s_waitcnt vmcnt(0) 325; GFX10-NEXT: global_store_short v0, v1, s[0:1] 326; GFX10-NEXT: s_endpgm 327 %load = load i16, ptr addrspace(1) %in, align 2 328 store i16 %load, ptr addrspace(1) %out, align 2 329 ret void 330} 331 332define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 333; GFX8-LABEL: constant_sextload_i8_align2: 334; GFX8: ; %bb.0: 335; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 336; GFX8-NEXT: s_waitcnt lgkmcnt(0) 337; GFX8-NEXT: v_mov_b32_e32 v0, s2 338; GFX8-NEXT: v_mov_b32_e32 v1, s3 339; GFX8-NEXT: flat_load_sbyte v2, v[0:1] 340; GFX8-NEXT: v_mov_b32_e32 v0, s0 341; GFX8-NEXT: s_add_u32 s2, s0, 2 342; GFX8-NEXT: v_mov_b32_e32 v1, s1 343; GFX8-NEXT: s_addc_u32 s3, s1, 0 344; GFX8-NEXT: s_waitcnt vmcnt(0) 345; GFX8-NEXT: flat_store_short v[0:1], v2 346; GFX8-NEXT: v_mov_b32_e32 v0, s2 347; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 348; GFX8-NEXT: v_mov_b32_e32 v1, s3 349; GFX8-NEXT: flat_store_short v[0:1], v3 350; GFX8-NEXT: s_endpgm 351; 352; GFX9-LABEL: constant_sextload_i8_align2: 353; GFX9: ; %bb.0: 354; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 355; GFX9-NEXT: v_mov_b32_e32 v0, 0 356; GFX9-NEXT: s_waitcnt lgkmcnt(0) 357; GFX9-NEXT: global_load_sbyte v1, v0, s[2:3] 358; GFX9-NEXT: s_waitcnt vmcnt(0) 359; GFX9-NEXT: global_store_short v0, v1, s[0:1] 360; GFX9-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2 361; GFX9-NEXT: s_endpgm 362; 363; GFX10-LABEL: constant_sextload_i8_align2: 364; GFX10: ; %bb.0: 365; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 366; GFX10-NEXT: v_mov_b32_e32 v0, 0 367; GFX10-NEXT: s_waitcnt lgkmcnt(0) 368; GFX10-NEXT: global_load_sbyte v1, v0, s[2:3] 369; GFX10-NEXT: s_waitcnt vmcnt(0) 370; GFX10-NEXT: global_store_short v0, v1, s[0:1] 371; GFX10-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:2 372; GFX10-NEXT: s_endpgm 373 %load = load i8, ptr addrspace(1) %in, align 2 374 %sextload = sext i8 %load to i32 375 store i32 %sextload, ptr addrspace(1) %out, align 2 376 ret void 377} 378 379define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 380; GFX8-LABEL: constant_zextload_i8_align2: 381; GFX8: ; %bb.0: 382; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 383; GFX8-NEXT: v_mov_b32_e32 v5, 0 384; GFX8-NEXT: s_waitcnt lgkmcnt(0) 385; GFX8-NEXT: v_mov_b32_e32 v0, s2 386; GFX8-NEXT: v_mov_b32_e32 v1, s3 387; GFX8-NEXT: flat_load_ubyte v4, v[0:1] 388; GFX8-NEXT: s_add_u32 s2, s0, 2 389; GFX8-NEXT: v_mov_b32_e32 v0, s0 390; GFX8-NEXT: v_mov_b32_e32 v1, s1 391; GFX8-NEXT: s_addc_u32 s3, s1, 0 392; GFX8-NEXT: v_mov_b32_e32 v2, s2 393; GFX8-NEXT: v_mov_b32_e32 v3, s3 394; GFX8-NEXT: s_waitcnt vmcnt(0) 395; GFX8-NEXT: flat_store_short v[0:1], v4 396; GFX8-NEXT: flat_store_short v[2:3], v5 397; GFX8-NEXT: s_endpgm 398; 399; GFX9-LABEL: constant_zextload_i8_align2: 400; GFX9: ; %bb.0: 401; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 402; GFX9-NEXT: v_mov_b32_e32 v0, 0 403; GFX9-NEXT: s_waitcnt lgkmcnt(0) 404; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] 405; GFX9-NEXT: s_waitcnt vmcnt(0) 406; GFX9-NEXT: global_store_short v0, v1, s[0:1] 407; GFX9-NEXT: global_store_short v0, v0, s[0:1] offset:2 408; GFX9-NEXT: s_endpgm 409; 410; GFX10-LABEL: constant_zextload_i8_align2: 411; GFX10: ; %bb.0: 412; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 413; GFX10-NEXT: v_mov_b32_e32 v0, 0 414; GFX10-NEXT: s_waitcnt lgkmcnt(0) 415; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] 416; GFX10-NEXT: s_waitcnt vmcnt(0) 417; GFX10-NEXT: global_store_short v0, v1, s[0:1] 418; GFX10-NEXT: global_store_short v0, v0, s[0:1] offset:2 419; GFX10-NEXT: s_endpgm 420 %load = load i8, ptr addrspace(1) %in, align 2 421 %zextload = zext i8 %load to i32 422 store i32 %zextload, ptr addrspace(1) %out, align 2 423 ret void 424} 425 426attributes #0 = { nounwind } 427