1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s 3 4; This test just checks that the compiler doesn't crash. 5 6define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) { 7; EG-LABEL: i8ptr_v16i8ptr: 8; EG: ; %bb.0: ; %entry 9; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 10; EG-NEXT: TEX 0 @6 11; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 12; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 13; EG-NEXT: CF_END 14; EG-NEXT: PAD 15; EG-NEXT: Fetch clause starting at 6: 16; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 17; EG-NEXT: ALU clause starting at 8: 18; EG-NEXT: MOV * T0.X, KC0[2].Z, 19; EG-NEXT: ALU clause starting at 9: 20; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 21; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 22entry: 23 %0 = load <16 x i8>, ptr addrspace(1) %in 24 store <16 x i8> %0, ptr addrspace(1) %out 25 ret void 26} 27 28define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 29; EG-LABEL: f32_to_v2i16: 30; EG: ; %bb.0: 31; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 32; EG-NEXT: TEX 0 @6 33; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 34; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 35; EG-NEXT: CF_END 36; EG-NEXT: PAD 37; EG-NEXT: Fetch clause starting at 6: 38; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 39; EG-NEXT: ALU clause starting at 8: 40; EG-NEXT: MOV * T0.X, KC0[2].Z, 41; EG-NEXT: ALU clause starting at 9: 42; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 43; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 44 %load = load float, ptr addrspace(1) %in, align 4 45 %bc = bitcast float %load to <2 x i16> 46 store <2 x i16> %bc, ptr addrspace(1) %out, align 4 47 ret void 48} 49 50define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 51; EG-LABEL: v2i16_to_f32: 52; EG: ; %bb.0: 53; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 54; EG-NEXT: TEX 0 @6 55; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 56; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 57; EG-NEXT: CF_END 58; EG-NEXT: PAD 59; EG-NEXT: Fetch clause starting at 6: 60; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 61; EG-NEXT: ALU clause starting at 8: 62; EG-NEXT: MOV * T0.X, KC0[2].Z, 63; EG-NEXT: ALU clause starting at 9: 64; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 65; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 66 %load = load <2 x i16>, ptr addrspace(1) %in, align 4 67 %bc = bitcast <2 x i16> %load to float 68 store float %bc, ptr addrspace(1) %out, align 4 69 ret void 70} 71 72define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 73; EG-LABEL: v4i8_to_i32: 74; EG: ; %bb.0: 75; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 76; EG-NEXT: TEX 0 @6 77; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 78; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 79; EG-NEXT: CF_END 80; EG-NEXT: PAD 81; EG-NEXT: Fetch clause starting at 6: 82; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 83; EG-NEXT: ALU clause starting at 8: 84; EG-NEXT: MOV * T0.X, KC0[2].Z, 85; EG-NEXT: ALU clause starting at 9: 86; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 87; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 88 %load = load <4 x i8>, ptr addrspace(1) %in, align 4 89 %bc = bitcast <4 x i8> %load to i32 90 store i32 %bc, ptr addrspace(1) %out, align 4 91 ret void 92} 93 94define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 95; EG-LABEL: i32_to_v4i8: 96; EG: ; %bb.0: 97; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 98; EG-NEXT: TEX 0 @6 99; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 100; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 101; EG-NEXT: CF_END 102; EG-NEXT: PAD 103; EG-NEXT: Fetch clause starting at 6: 104; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 105; EG-NEXT: ALU clause starting at 8: 106; EG-NEXT: MOV * T0.X, KC0[2].Z, 107; EG-NEXT: ALU clause starting at 9: 108; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 109; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 110 %load = load i32, ptr addrspace(1) %in, align 4 111 %bc = bitcast i32 %load to <4 x i8> 112 store <4 x i8> %bc, ptr addrspace(1) %out, align 4 113 ret void 114} 115 116define amdgpu_kernel void @v2i16_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 117; EG-LABEL: v2i16_to_v4i8: 118; EG: ; %bb.0: 119; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 120; EG-NEXT: TEX 0 @6 121; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 122; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 123; EG-NEXT: CF_END 124; EG-NEXT: PAD 125; EG-NEXT: Fetch clause starting at 6: 126; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 127; EG-NEXT: ALU clause starting at 8: 128; EG-NEXT: MOV * T0.X, KC0[2].Z, 129; EG-NEXT: ALU clause starting at 9: 130; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 131; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 132 %load = load <2 x i16>, ptr addrspace(1) %in, align 4 133 %bc = bitcast <2 x i16> %load to <4 x i8> 134 store <4 x i8> %bc, ptr addrspace(1) %out, align 4 135 ret void 136} 137 138; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine 139; the stack manipulation is tricky to follow 140; TODO: This should only use one load 141define amdgpu_kernel void @v4i16_extract_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 142; EG-LABEL: v4i16_extract_i8: 143; EG: ; %bb.0: 144; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 145; EG-NEXT: TEX 1 @6 146; EG-NEXT: ALU 17, @11, KC0[CB0:0-32], KC1[] 147; EG-NEXT: MEM_RAT MSKOR T5.XW, T6.X 148; EG-NEXT: CF_END 149; EG-NEXT: PAD 150; EG-NEXT: Fetch clause starting at 6: 151; EG-NEXT: VTX_READ_16 T6.X, T5.X, 6, #1 152; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 153; EG-NEXT: ALU clause starting at 10: 154; EG-NEXT: MOV * T5.X, KC0[2].Z, 155; EG-NEXT: ALU clause starting at 11: 156; EG-NEXT: LSHL * T0.W, T6.X, literal.x, 157; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 158; EG-NEXT: OR_INT * T0.W, PV.W, T5.X, 159; EG-NEXT: MOV * T3.X, PV.W, 160; EG-NEXT: MOV T0.Y, PV.X, 161; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 162; EG-NEXT: MOV * T1.W, literal.y, 163; EG-NEXT: 3(4.203895e-45), 8(1.121039e-44) 164; EG-NEXT: BFE_UINT T1.W, PV.Y, literal.x, PS, 165; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 166; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45) 167; EG-NEXT: LSHL T5.X, PV.W, PS, 168; EG-NEXT: LSHL * T5.W, literal.x, PS, 169; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 170; EG-NEXT: MOV T5.Y, 0.0, 171; EG-NEXT: MOV * T5.Z, 0.0, 172; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 173; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 174 %load = load <4 x i16>, ptr addrspace(1) %in, align 2 175 %bc = bitcast <4 x i16> %load to <8 x i8> 176 %element = extractelement <8 x i8> %bc, i32 5 177 store i8 %element, ptr addrspace(1) %out 178 ret void 179} 180 181define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 182; EG-LABEL: bitcast_v2i32_to_f64: 183; EG: ; %bb.0: 184; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 185; EG-NEXT: TEX 0 @6 186; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 187; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 188; EG-NEXT: CF_END 189; EG-NEXT: PAD 190; EG-NEXT: Fetch clause starting at 6: 191; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 192; EG-NEXT: ALU clause starting at 8: 193; EG-NEXT: MOV * T0.X, KC0[2].Z, 194; EG-NEXT: ALU clause starting at 9: 195; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 196; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 197 %val = load <2 x i32>, ptr addrspace(1) %in, align 8 198 %bc = bitcast <2 x i32> %val to double 199 store double %bc, ptr addrspace(1) %out, align 8 200 ret void 201} 202 203