1; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s 2 3; REQUIRES: asserts 4 5; NOTE: Due to their nature the expected inserts and extracts often emit no 6; instructions and so these tests verify the output of DAGCombiner directly. 7 8target triple = "aarch64-unknown-linux-gnu" 9 10; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:' 11; CHECK: SelectionDAG has 10 nodes: 12; CHECK: t0: ch,glue = EntryToken 13; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0 14; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 15; CHECK: t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0> 16; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6 17; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1 18 19; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:' 20; CHECK: SelectionDAG has 9 nodes: 21; CHECK: t0: ch,glue = EntryToken 22; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0 23; CHECK: t10: v16i8 = insert_subvector undef:v16i8, t2, Constant:i64<0> 24; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10 25; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1 26 27define <16 x i8> @insert_small_fixed_into_big_fixed(<8 x i8> %a) #0 { 28 %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0) 29 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0) 30 ret <16 x i8> %insert 31} 32 33; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:' 34; CHECK: SelectionDAG has 10 nodes: 35; CHECK: t0: ch,glue = EntryToken 36; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0 37; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 38; CHECK: t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0> 39; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6 40; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1 41 42; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:' 43; CHECK: SelectionDAG has 9 nodes: 44; CHECK: t0: ch,glue = EntryToken 45; CHECK: t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0 46; CHECK: t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0> 47; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10 48; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1 49 50define <vscale x 16 x i8> @insert_small_fixed_into_big_scalable(<8 x i8> %a) #0 { 51 %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0) 52 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0) 53 ret <vscale x 16 x i8> %insert 54} 55 56; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:' 57; CHECK: SelectionDAG has 11 nodes: 58; CHECK: t0: ch,glue = EntryToken 59; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 60; CHECK: t3: nxv8i8 = truncate t2 61; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0> 62; CHECK: t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0> 63; CHECK: t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7 64; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1 65 66; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:' 67; CHECK: SelectionDAG has 11 nodes: 68; CHECK: t0: ch,glue = EntryToken 69; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 70; CHECK: t3: nxv8i8 = truncate t2 71; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0> 72; CHECK: t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0> 73; CHECK: t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7 74; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1 75 76; Resulting insert would not be legal, so there's no transformation. 77define <16 x i8> @insert_small_scalable_into_big_fixed(<vscale x 8 x i8> %a) #0 { 78 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0) 79 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0) 80 ret <16 x i8> %insert 81} 82 83; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:' 84; CHECK: SelectionDAG has 11 nodes: 85; CHECK: t0: ch,glue = EntryToken 86; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 87; CHECK: t3: nxv8i8 = truncate t2 88; CHECK: t5: v4i8 = extract_subvector t3, Constant:i64<0> 89; CHECK: t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0> 90; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7 91; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1 92 93; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:' 94; CHECK: SelectionDAG has 10 nodes: 95; CHECK: t0: ch,glue = EntryToken 96; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 97; CHECK: t3: nxv8i8 = truncate t2 98; CHECK: t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0> 99; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11 100; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1 101 102define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_1(<vscale x 8 x i8> %a) #0 { 103 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0) 104 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0) 105 ret <vscale x 16 x i8> %insert 106} 107 108; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:' 109; CHECK: SelectionDAG has 11 nodes: 110; CHECK: t0: ch,glue = EntryToken 111; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 112; CHECK: t3: nxv8i8 = truncate t2 113; CHECK: t5: nxv4i8 = extract_subvector t3, Constant:i64<0> 114; CHECK: t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0> 115; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7 116; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1 117 118; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:' 119; CHECK: SelectionDAG has 10 nodes: 120; CHECK: t0: ch,glue = EntryToken 121; CHECK: t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0 122; CHECK: t3: nxv8i8 = truncate t2 123; CHECK: t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0> 124; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11 125; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1 126 127define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_2(<vscale x 8 x i8> %a) #0 { 128 %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0) 129 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <vscale x 4 x i8> %extract, i64 0) 130 ret <vscale x 16 x i8> %insert 131} 132 133; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:' 134; CHECK: SelectionDAG has 10 nodes: 135; CHECK: t0: ch,glue = EntryToken 136; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 137; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 138; CHECK: t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0> 139; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6 140; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1 141 142; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:' 143; CHECK: SelectionDAG has 8 nodes: 144; CHECK: t0: ch,glue = EntryToken 145; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 146; CHECK: t10: v8i8 = extract_subvector t2, Constant:i64<0> 147; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10 148; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1 149 150define <8 x i8> @extract_small_fixed_from_big_fixed(<16 x i8> %a) #0 { 151 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0) 152 %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0) 153 ret <8 x i8> %insert 154} 155 156; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:' 157; CHECK: SelectionDAG has 11 nodes: 158; CHECK: t0: ch,glue = EntryToken 159; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 160; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 161; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0> 162; CHECK: t7: nxv8i16 = any_extend t6 163; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 164; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 165 166; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:' 167; CHECK: SelectionDAG has 11 nodes: 168; CHECK: t0: ch,glue = EntryToken 169; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 170; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 171; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0> 172; CHECK: t7: nxv8i16 = any_extend t6 173; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 174; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 175 176; Resulting insert would not be legal, so there's no transformation. 177define <vscale x 8 x i8> @extract_small_scalable_from_big_fixed(<16 x i8> %a) #0 { 178 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0) 179 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0) 180 ret <vscale x 8 x i8> %insert 181} 182 183; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:' 184; CHECK: SelectionDAG has 10 nodes: 185; CHECK: t0: ch,glue = EntryToken 186; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 187; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 188; CHECK: t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0> 189; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6 190; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1 191 192; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:' 193; CHECK: SelectionDAG has 8 nodes: 194; CHECK: t0: ch,glue = EntryToken 195; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 196; CHECK: t10: v8i8 = extract_subvector t2, Constant:i64<0> 197; CHECK: t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10 198; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1 199 200define <8 x i8> @extract_small_fixed_from_big_scalable(<vscale x 16 x i8> %a) #0 { 201 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0) 202 %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0) 203 ret <8 x i8> %insert 204} 205 206; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:' 207; CHECK: SelectionDAG has 11 nodes: 208; CHECK: t0: ch,glue = EntryToken 209; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 210; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 211; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0> 212; CHECK: t7: nxv8i16 = any_extend t6 213; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 214; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 215 216; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:' 217; CHECK: SelectionDAG has 9 nodes: 218; CHECK: t0: ch,glue = EntryToken 219; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 220; CHECK: t11: nxv8i8 = extract_subvector t2, Constant:i64<0> 221; CHECK: t7: nxv8i16 = any_extend t11 222; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 223; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 224 225define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_1(<vscale x 16 x i8> %a) #0 { 226 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0) 227 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0) 228 ret <vscale x 8 x i8> %insert 229} 230 231; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:' 232; CHECK: SelectionDAG has 11 nodes: 233; CHECK: t0: ch,glue = EntryToken 234; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 235; CHECK: t4: nxv4i8 = extract_subvector t2, Constant:i64<0> 236; CHECK: t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0> 237; CHECK: t7: nxv8i16 = any_extend t6 238; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 239; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 240 241; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:' 242; CHECK: SelectionDAG has 9 nodes: 243; CHECK: t0: ch,glue = EntryToken 244; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 245; CHECK: t11: nxv8i8 = extract_subvector t2, Constant:i64<0> 246; CHECK: t7: nxv8i16 = any_extend t11 247; CHECK: t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7 248; CHECK: t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1 249 250define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_2(<vscale x 16 x i8> %a) #0 { 251 %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0) 252 %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <vscale x 4 x i8> %extract, i64 0) 253 ret <vscale x 8 x i8> %insert 254} 255 256; CHECK: Initial selection DAG: %bb.0 'extract_fixed_from_scalable:' 257; CHECK: SelectionDAG has 10 nodes: 258; CHECK: t0: ch,glue = EntryToken 259; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 260; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 261; CHECK: t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0> 262; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6 263; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1 264 265; CHECK: Optimized lowered selection DAG: %bb.0 'extract_fixed_from_scalable:' 266; CHECK: SelectionDAG has 8 nodes: 267; CHECK: t0: ch,glue = EntryToken 268; CHECK: t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0 269; CHECK: t10: v16i8 = extract_subvector t2, Constant:i64<0> 270; CHECK: t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10 271; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1 272 273; A variant of insert_small_scalable_into_big_fixed whose vector types prevent 274; the expected transformation because the resulting insert would not be legal. 275; In this instance their matching minimum vector lengths allow us to perform the 276; opposite transformation and emit an extract instead. 277define <16 x i8> @extract_fixed_from_scalable(<vscale x 16 x i8> %a) #0 { 278 %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0) 279 %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0) 280 ret <16 x i8> %insert 281} 282 283; CHECK: Initial selection DAG: %bb.0 'insert_fixed_into_scalable:' 284; CHECK: SelectionDAG has 10 nodes: 285; CHECK: t0: ch,glue = EntryToken 286; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 287; CHECK: t4: v4i8 = extract_subvector t2, Constant:i64<0> 288; CHECK: t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0> 289; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6 290; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1 291 292; CHECK: Optimized lowered selection DAG: %bb.0 'insert_fixed_into_scalable:' 293; CHECK: SelectionDAG has 9 nodes: 294; CHECK: t0: ch,glue = EntryToken 295; CHECK: t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0 296; CHECK: t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0> 297; CHECK: t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10 298; CHECK: t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1 299 300; A variant of extract_small_scalable_from_big_fixed whose vector types prevent 301; the expected transformation because the resulting extract would not be legal. 302; In this instance their matching minimum vector lengths allow us to perform the 303; opposite transformation and emit an insert instead. 304define <vscale x 16 x i8> @insert_fixed_into_scalable(<16 x i8> %a) #0 { 305 %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0) 306 %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0) 307 ret <vscale x 16 x i8> %insert 308} 309 310attributes #0 = { "target-features"="+sve" } 311