1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 \ 3; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 4; RUN: llc < %s -mtriple=riscv64 \ 5; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 6; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \ 7; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST 8; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \ 9; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST 10 11; ---------------------------------------------------------------------- 12; Fully unaligned cases 13 14define void @unaligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind { 15; RV32-BOTH-LABEL: unaligned_memcpy0: 16; RV32-BOTH: # %bb.0: # %entry 17; RV32-BOTH-NEXT: ret 18; 19; RV64-BOTH-LABEL: unaligned_memcpy0: 20; RV64-BOTH: # %bb.0: # %entry 21; RV64-BOTH-NEXT: ret 22entry: 23 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false) 24 ret void 25} 26 27define void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind { 28; RV32-BOTH-LABEL: unaligned_memcpy1: 29; RV32-BOTH: # %bb.0: # %entry 30; RV32-BOTH-NEXT: lbu a1, 0(a1) 31; RV32-BOTH-NEXT: sb a1, 0(a0) 32; RV32-BOTH-NEXT: ret 33; 34; RV64-BOTH-LABEL: unaligned_memcpy1: 35; RV64-BOTH: # %bb.0: # %entry 36; RV64-BOTH-NEXT: lbu a1, 0(a1) 37; RV64-BOTH-NEXT: sb a1, 0(a0) 38; RV64-BOTH-NEXT: ret 39entry: 40 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false) 41 ret void 42} 43 44define void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind { 45; RV32-LABEL: unaligned_memcpy2: 46; RV32: # %bb.0: # %entry 47; RV32-NEXT: lbu a2, 1(a1) 48; RV32-NEXT: sb a2, 1(a0) 49; RV32-NEXT: lbu a1, 0(a1) 50; RV32-NEXT: sb a1, 0(a0) 51; RV32-NEXT: ret 52; 53; RV64-LABEL: unaligned_memcpy2: 54; RV64: # %bb.0: # %entry 55; RV64-NEXT: lbu a2, 1(a1) 56; RV64-NEXT: sb a2, 1(a0) 57; RV64-NEXT: lbu a1, 0(a1) 58; RV64-NEXT: sb a1, 0(a0) 59; RV64-NEXT: ret 60; 61; RV32-FAST-LABEL: unaligned_memcpy2: 62; RV32-FAST: # %bb.0: # %entry 63; RV32-FAST-NEXT: lh a1, 0(a1) 64; RV32-FAST-NEXT: sh a1, 0(a0) 65; RV32-FAST-NEXT: ret 66; 67; RV64-FAST-LABEL: unaligned_memcpy2: 68; RV64-FAST: # %bb.0: # %entry 69; RV64-FAST-NEXT: lh a1, 0(a1) 70; RV64-FAST-NEXT: sh a1, 0(a0) 71; RV64-FAST-NEXT: ret 72entry: 73 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false) 74 ret void 75} 76 77define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind { 78; RV32-LABEL: unaligned_memcpy3: 79; RV32: # %bb.0: # %entry 80; RV32-NEXT: lbu a2, 2(a1) 81; RV32-NEXT: sb a2, 2(a0) 82; RV32-NEXT: lbu a2, 1(a1) 83; RV32-NEXT: sb a2, 1(a0) 84; RV32-NEXT: lbu a1, 0(a1) 85; RV32-NEXT: sb a1, 0(a0) 86; RV32-NEXT: ret 87; 88; RV64-LABEL: unaligned_memcpy3: 89; RV64: # %bb.0: # %entry 90; RV64-NEXT: lbu a2, 2(a1) 91; RV64-NEXT: sb a2, 2(a0) 92; RV64-NEXT: lbu a2, 1(a1) 93; RV64-NEXT: sb a2, 1(a0) 94; RV64-NEXT: lbu a1, 0(a1) 95; RV64-NEXT: sb a1, 0(a0) 96; RV64-NEXT: ret 97; 98; RV32-FAST-LABEL: unaligned_memcpy3: 99; RV32-FAST: # %bb.0: # %entry 100; RV32-FAST-NEXT: lbu a2, 2(a1) 101; RV32-FAST-NEXT: sb a2, 2(a0) 102; RV32-FAST-NEXT: lh a1, 0(a1) 103; RV32-FAST-NEXT: sh a1, 0(a0) 104; RV32-FAST-NEXT: ret 105; 106; RV64-FAST-LABEL: unaligned_memcpy3: 107; RV64-FAST: # %bb.0: # %entry 108; RV64-FAST-NEXT: lbu a2, 2(a1) 109; RV64-FAST-NEXT: sb a2, 2(a0) 110; RV64-FAST-NEXT: lh a1, 0(a1) 111; RV64-FAST-NEXT: sh a1, 0(a0) 112; RV64-FAST-NEXT: ret 113entry: 114 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false) 115 ret void 116} 117 118define void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind { 119; RV32-LABEL: unaligned_memcpy4: 120; RV32: # %bb.0: # %entry 121; RV32-NEXT: lbu a2, 3(a1) 122; RV32-NEXT: sb a2, 3(a0) 123; RV32-NEXT: lbu a2, 2(a1) 124; RV32-NEXT: sb a2, 2(a0) 125; RV32-NEXT: lbu a2, 1(a1) 126; RV32-NEXT: sb a2, 1(a0) 127; RV32-NEXT: lbu a1, 0(a1) 128; RV32-NEXT: sb a1, 0(a0) 129; RV32-NEXT: ret 130; 131; RV64-LABEL: unaligned_memcpy4: 132; RV64: # %bb.0: # %entry 133; RV64-NEXT: lbu a2, 3(a1) 134; RV64-NEXT: sb a2, 3(a0) 135; RV64-NEXT: lbu a2, 2(a1) 136; RV64-NEXT: sb a2, 2(a0) 137; RV64-NEXT: lbu a2, 1(a1) 138; RV64-NEXT: sb a2, 1(a0) 139; RV64-NEXT: lbu a1, 0(a1) 140; RV64-NEXT: sb a1, 0(a0) 141; RV64-NEXT: ret 142; 143; RV32-FAST-LABEL: unaligned_memcpy4: 144; RV32-FAST: # %bb.0: # %entry 145; RV32-FAST-NEXT: lw a1, 0(a1) 146; RV32-FAST-NEXT: sw a1, 0(a0) 147; RV32-FAST-NEXT: ret 148; 149; RV64-FAST-LABEL: unaligned_memcpy4: 150; RV64-FAST: # %bb.0: # %entry 151; RV64-FAST-NEXT: lw a1, 0(a1) 152; RV64-FAST-NEXT: sw a1, 0(a0) 153; RV64-FAST-NEXT: ret 154entry: 155 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false) 156 ret void 157} 158 159define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { 160; RV32-LABEL: unaligned_memcpy7: 161; RV32: # %bb.0: # %entry 162; RV32-NEXT: lbu a2, 6(a1) 163; RV32-NEXT: sb a2, 6(a0) 164; RV32-NEXT: lbu a2, 5(a1) 165; RV32-NEXT: sb a2, 5(a0) 166; RV32-NEXT: lbu a2, 4(a1) 167; RV32-NEXT: sb a2, 4(a0) 168; RV32-NEXT: lbu a2, 3(a1) 169; RV32-NEXT: sb a2, 3(a0) 170; RV32-NEXT: lbu a2, 2(a1) 171; RV32-NEXT: sb a2, 2(a0) 172; RV32-NEXT: lbu a2, 1(a1) 173; RV32-NEXT: sb a2, 1(a0) 174; RV32-NEXT: lbu a1, 0(a1) 175; RV32-NEXT: sb a1, 0(a0) 176; RV32-NEXT: ret 177; 178; RV64-LABEL: unaligned_memcpy7: 179; RV64: # %bb.0: # %entry 180; RV64-NEXT: lbu a2, 6(a1) 181; RV64-NEXT: sb a2, 6(a0) 182; RV64-NEXT: lbu a2, 5(a1) 183; RV64-NEXT: sb a2, 5(a0) 184; RV64-NEXT: lbu a2, 4(a1) 185; RV64-NEXT: sb a2, 4(a0) 186; RV64-NEXT: lbu a2, 3(a1) 187; RV64-NEXT: sb a2, 3(a0) 188; RV64-NEXT: lbu a2, 2(a1) 189; RV64-NEXT: sb a2, 2(a0) 190; RV64-NEXT: lbu a2, 1(a1) 191; RV64-NEXT: sb a2, 1(a0) 192; RV64-NEXT: lbu a1, 0(a1) 193; RV64-NEXT: sb a1, 0(a0) 194; RV64-NEXT: ret 195; 196; RV32-FAST-LABEL: unaligned_memcpy7: 197; RV32-FAST: # %bb.0: # %entry 198; RV32-FAST-NEXT: lw a2, 3(a1) 199; RV32-FAST-NEXT: sw a2, 3(a0) 200; RV32-FAST-NEXT: lw a1, 0(a1) 201; RV32-FAST-NEXT: sw a1, 0(a0) 202; RV32-FAST-NEXT: ret 203; 204; RV64-FAST-LABEL: unaligned_memcpy7: 205; RV64-FAST: # %bb.0: # %entry 206; RV64-FAST-NEXT: lw a2, 3(a1) 207; RV64-FAST-NEXT: sw a2, 3(a0) 208; RV64-FAST-NEXT: lw a1, 0(a1) 209; RV64-FAST-NEXT: sw a1, 0(a0) 210; RV64-FAST-NEXT: ret 211entry: 212 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false) 213 ret void 214} 215 216define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind { 217; RV32-LABEL: unaligned_memcpy8: 218; RV32: # %bb.0: # %entry 219; RV32-NEXT: lbu a2, 7(a1) 220; RV32-NEXT: sb a2, 7(a0) 221; RV32-NEXT: lbu a2, 6(a1) 222; RV32-NEXT: sb a2, 6(a0) 223; RV32-NEXT: lbu a2, 5(a1) 224; RV32-NEXT: sb a2, 5(a0) 225; RV32-NEXT: lbu a2, 4(a1) 226; RV32-NEXT: sb a2, 4(a0) 227; RV32-NEXT: lbu a2, 3(a1) 228; RV32-NEXT: sb a2, 3(a0) 229; RV32-NEXT: lbu a2, 2(a1) 230; RV32-NEXT: sb a2, 2(a0) 231; RV32-NEXT: lbu a2, 1(a1) 232; RV32-NEXT: sb a2, 1(a0) 233; RV32-NEXT: lbu a1, 0(a1) 234; RV32-NEXT: sb a1, 0(a0) 235; RV32-NEXT: ret 236; 237; RV64-LABEL: unaligned_memcpy8: 238; RV64: # %bb.0: # %entry 239; RV64-NEXT: lbu a2, 7(a1) 240; RV64-NEXT: sb a2, 7(a0) 241; RV64-NEXT: lbu a2, 6(a1) 242; RV64-NEXT: sb a2, 6(a0) 243; RV64-NEXT: lbu a2, 5(a1) 244; RV64-NEXT: sb a2, 5(a0) 245; RV64-NEXT: lbu a2, 4(a1) 246; RV64-NEXT: sb a2, 4(a0) 247; RV64-NEXT: lbu a2, 3(a1) 248; RV64-NEXT: sb a2, 3(a0) 249; RV64-NEXT: lbu a2, 2(a1) 250; RV64-NEXT: sb a2, 2(a0) 251; RV64-NEXT: lbu a2, 1(a1) 252; RV64-NEXT: sb a2, 1(a0) 253; RV64-NEXT: lbu a1, 0(a1) 254; RV64-NEXT: sb a1, 0(a0) 255; RV64-NEXT: ret 256; 257; RV32-FAST-LABEL: unaligned_memcpy8: 258; RV32-FAST: # %bb.0: # %entry 259; RV32-FAST-NEXT: lw a2, 4(a1) 260; RV32-FAST-NEXT: sw a2, 4(a0) 261; RV32-FAST-NEXT: lw a1, 0(a1) 262; RV32-FAST-NEXT: sw a1, 0(a0) 263; RV32-FAST-NEXT: ret 264; 265; RV64-FAST-LABEL: unaligned_memcpy8: 266; RV64-FAST: # %bb.0: # %entry 267; RV64-FAST-NEXT: ld a1, 0(a1) 268; RV64-FAST-NEXT: sd a1, 0(a0) 269; RV64-FAST-NEXT: ret 270entry: 271 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false) 272 ret void 273} 274 275define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { 276; RV32-LABEL: unaligned_memcpy15: 277; RV32: # %bb.0: # %entry 278; RV32-NEXT: li a2, 15 279; RV32-NEXT: tail memcpy 280; 281; RV64-LABEL: unaligned_memcpy15: 282; RV64: # %bb.0: # %entry 283; RV64-NEXT: li a2, 15 284; RV64-NEXT: tail memcpy 285; 286; RV32-FAST-LABEL: unaligned_memcpy15: 287; RV32-FAST: # %bb.0: # %entry 288; RV32-FAST-NEXT: lw a2, 11(a1) 289; RV32-FAST-NEXT: sw a2, 11(a0) 290; RV32-FAST-NEXT: lw a2, 8(a1) 291; RV32-FAST-NEXT: sw a2, 8(a0) 292; RV32-FAST-NEXT: lw a2, 4(a1) 293; RV32-FAST-NEXT: sw a2, 4(a0) 294; RV32-FAST-NEXT: lw a1, 0(a1) 295; RV32-FAST-NEXT: sw a1, 0(a0) 296; RV32-FAST-NEXT: ret 297; 298; RV64-FAST-LABEL: unaligned_memcpy15: 299; RV64-FAST: # %bb.0: # %entry 300; RV64-FAST-NEXT: ld a2, 7(a1) 301; RV64-FAST-NEXT: sd a2, 7(a0) 302; RV64-FAST-NEXT: ld a1, 0(a1) 303; RV64-FAST-NEXT: sd a1, 0(a0) 304; RV64-FAST-NEXT: ret 305entry: 306 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false) 307 ret void 308} 309 310define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind { 311; RV32-LABEL: unaligned_memcpy16: 312; RV32: # %bb.0: # %entry 313; RV32-NEXT: li a2, 16 314; RV32-NEXT: tail memcpy 315; 316; RV64-LABEL: unaligned_memcpy16: 317; RV64: # %bb.0: # %entry 318; RV64-NEXT: li a2, 16 319; RV64-NEXT: tail memcpy 320; 321; RV32-FAST-LABEL: unaligned_memcpy16: 322; RV32-FAST: # %bb.0: # %entry 323; RV32-FAST-NEXT: lw a2, 12(a1) 324; RV32-FAST-NEXT: sw a2, 12(a0) 325; RV32-FAST-NEXT: lw a2, 8(a1) 326; RV32-FAST-NEXT: sw a2, 8(a0) 327; RV32-FAST-NEXT: lw a2, 4(a1) 328; RV32-FAST-NEXT: sw a2, 4(a0) 329; RV32-FAST-NEXT: lw a1, 0(a1) 330; RV32-FAST-NEXT: sw a1, 0(a0) 331; RV32-FAST-NEXT: ret 332; 333; RV64-FAST-LABEL: unaligned_memcpy16: 334; RV64-FAST: # %bb.0: # %entry 335; RV64-FAST-NEXT: ld a2, 8(a1) 336; RV64-FAST-NEXT: sd a2, 8(a0) 337; RV64-FAST-NEXT: ld a1, 0(a1) 338; RV64-FAST-NEXT: sd a1, 0(a0) 339; RV64-FAST-NEXT: ret 340entry: 341 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false) 342 ret void 343} 344 345define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { 346; RV32-LABEL: unaligned_memcpy31: 347; RV32: # %bb.0: # %entry 348; RV32-NEXT: li a2, 31 349; RV32-NEXT: tail memcpy 350; 351; RV64-LABEL: unaligned_memcpy31: 352; RV64: # %bb.0: # %entry 353; RV64-NEXT: li a2, 31 354; RV64-NEXT: tail memcpy 355; 356; RV32-FAST-LABEL: unaligned_memcpy31: 357; RV32-FAST: # %bb.0: # %entry 358; RV32-FAST-NEXT: lw a2, 27(a1) 359; RV32-FAST-NEXT: sw a2, 27(a0) 360; RV32-FAST-NEXT: lw a2, 24(a1) 361; RV32-FAST-NEXT: sw a2, 24(a0) 362; RV32-FAST-NEXT: lw a2, 20(a1) 363; RV32-FAST-NEXT: sw a2, 20(a0) 364; RV32-FAST-NEXT: lw a2, 16(a1) 365; RV32-FAST-NEXT: sw a2, 16(a0) 366; RV32-FAST-NEXT: lw a2, 12(a1) 367; RV32-FAST-NEXT: sw a2, 12(a0) 368; RV32-FAST-NEXT: lw a2, 8(a1) 369; RV32-FAST-NEXT: sw a2, 8(a0) 370; RV32-FAST-NEXT: lw a2, 4(a1) 371; RV32-FAST-NEXT: sw a2, 4(a0) 372; RV32-FAST-NEXT: lw a1, 0(a1) 373; RV32-FAST-NEXT: sw a1, 0(a0) 374; RV32-FAST-NEXT: ret 375; 376; RV64-FAST-LABEL: unaligned_memcpy31: 377; RV64-FAST: # %bb.0: # %entry 378; RV64-FAST-NEXT: ld a2, 23(a1) 379; RV64-FAST-NEXT: sd a2, 23(a0) 380; RV64-FAST-NEXT: ld a2, 16(a1) 381; RV64-FAST-NEXT: sd a2, 16(a0) 382; RV64-FAST-NEXT: ld a2, 8(a1) 383; RV64-FAST-NEXT: sd a2, 8(a0) 384; RV64-FAST-NEXT: ld a1, 0(a1) 385; RV64-FAST-NEXT: sd a1, 0(a0) 386; RV64-FAST-NEXT: ret 387entry: 388 tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false) 389 ret void 390} 391 392; ---------------------------------------------------------------------- 393; Fully aligned cases 394 395define void @aligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind { 396; RV32-BOTH-LABEL: aligned_memcpy0: 397; RV32-BOTH: # %bb.0: # %entry 398; RV32-BOTH-NEXT: ret 399; 400; RV64-BOTH-LABEL: aligned_memcpy0: 401; RV64-BOTH: # %bb.0: # %entry 402; RV64-BOTH-NEXT: ret 403entry: 404 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 0, i1 false) 405 ret void 406} 407 408define void @aligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind { 409; RV32-BOTH-LABEL: aligned_memcpy1: 410; RV32-BOTH: # %bb.0: # %entry 411; RV32-BOTH-NEXT: lbu a1, 0(a1) 412; RV32-BOTH-NEXT: sb a1, 0(a0) 413; RV32-BOTH-NEXT: ret 414; 415; RV64-BOTH-LABEL: aligned_memcpy1: 416; RV64-BOTH: # %bb.0: # %entry 417; RV64-BOTH-NEXT: lbu a1, 0(a1) 418; RV64-BOTH-NEXT: sb a1, 0(a0) 419; RV64-BOTH-NEXT: ret 420entry: 421 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 1, i1 false) 422 ret void 423} 424 425define void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind { 426; RV32-BOTH-LABEL: aligned_memcpy2: 427; RV32-BOTH: # %bb.0: # %entry 428; RV32-BOTH-NEXT: lh a1, 0(a1) 429; RV32-BOTH-NEXT: sh a1, 0(a0) 430; RV32-BOTH-NEXT: ret 431; 432; RV64-BOTH-LABEL: aligned_memcpy2: 433; RV64-BOTH: # %bb.0: # %entry 434; RV64-BOTH-NEXT: lh a1, 0(a1) 435; RV64-BOTH-NEXT: sh a1, 0(a0) 436; RV64-BOTH-NEXT: ret 437entry: 438 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false) 439 ret void 440} 441 442define void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind { 443; RV32-BOTH-LABEL: aligned_memcpy3: 444; RV32-BOTH: # %bb.0: # %entry 445; RV32-BOTH-NEXT: lbu a2, 2(a1) 446; RV32-BOTH-NEXT: sb a2, 2(a0) 447; RV32-BOTH-NEXT: lh a1, 0(a1) 448; RV32-BOTH-NEXT: sh a1, 0(a0) 449; RV32-BOTH-NEXT: ret 450; 451; RV64-BOTH-LABEL: aligned_memcpy3: 452; RV64-BOTH: # %bb.0: # %entry 453; RV64-BOTH-NEXT: lbu a2, 2(a1) 454; RV64-BOTH-NEXT: sb a2, 2(a0) 455; RV64-BOTH-NEXT: lh a1, 0(a1) 456; RV64-BOTH-NEXT: sh a1, 0(a0) 457; RV64-BOTH-NEXT: ret 458entry: 459 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false) 460 ret void 461} 462 463define void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind { 464; RV32-BOTH-LABEL: aligned_memcpy4: 465; RV32-BOTH: # %bb.0: # %entry 466; RV32-BOTH-NEXT: lw a1, 0(a1) 467; RV32-BOTH-NEXT: sw a1, 0(a0) 468; RV32-BOTH-NEXT: ret 469; 470; RV64-BOTH-LABEL: aligned_memcpy4: 471; RV64-BOTH: # %bb.0: # %entry 472; RV64-BOTH-NEXT: lw a1, 0(a1) 473; RV64-BOTH-NEXT: sw a1, 0(a0) 474; RV64-BOTH-NEXT: ret 475entry: 476 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false) 477 ret void 478} 479 480define void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { 481; RV32-LABEL: aligned_memcpy7: 482; RV32: # %bb.0: # %entry 483; RV32-NEXT: lbu a2, 6(a1) 484; RV32-NEXT: sb a2, 6(a0) 485; RV32-NEXT: lh a2, 4(a1) 486; RV32-NEXT: sh a2, 4(a0) 487; RV32-NEXT: lw a1, 0(a1) 488; RV32-NEXT: sw a1, 0(a0) 489; RV32-NEXT: ret 490; 491; RV64-LABEL: aligned_memcpy7: 492; RV64: # %bb.0: # %entry 493; RV64-NEXT: lbu a2, 6(a1) 494; RV64-NEXT: sb a2, 6(a0) 495; RV64-NEXT: lh a2, 4(a1) 496; RV64-NEXT: sh a2, 4(a0) 497; RV64-NEXT: lw a1, 0(a1) 498; RV64-NEXT: sw a1, 0(a0) 499; RV64-NEXT: ret 500; 501; RV32-FAST-LABEL: aligned_memcpy7: 502; RV32-FAST: # %bb.0: # %entry 503; RV32-FAST-NEXT: lw a2, 3(a1) 504; RV32-FAST-NEXT: sw a2, 3(a0) 505; RV32-FAST-NEXT: lw a1, 0(a1) 506; RV32-FAST-NEXT: sw a1, 0(a0) 507; RV32-FAST-NEXT: ret 508; 509; RV64-FAST-LABEL: aligned_memcpy7: 510; RV64-FAST: # %bb.0: # %entry 511; RV64-FAST-NEXT: lw a2, 3(a1) 512; RV64-FAST-NEXT: sw a2, 3(a0) 513; RV64-FAST-NEXT: lw a1, 0(a1) 514; RV64-FAST-NEXT: sw a1, 0(a0) 515; RV64-FAST-NEXT: ret 516entry: 517 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false) 518 ret void 519} 520 521define void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind { 522; RV32-BOTH-LABEL: aligned_memcpy8: 523; RV32-BOTH: # %bb.0: # %entry 524; RV32-BOTH-NEXT: lw a2, 4(a1) 525; RV32-BOTH-NEXT: sw a2, 4(a0) 526; RV32-BOTH-NEXT: lw a1, 0(a1) 527; RV32-BOTH-NEXT: sw a1, 0(a0) 528; RV32-BOTH-NEXT: ret 529; 530; RV64-BOTH-LABEL: aligned_memcpy8: 531; RV64-BOTH: # %bb.0: # %entry 532; RV64-BOTH-NEXT: ld a1, 0(a1) 533; RV64-BOTH-NEXT: sd a1, 0(a0) 534; RV64-BOTH-NEXT: ret 535entry: 536 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false) 537 ret void 538} 539 540define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { 541; RV32-LABEL: aligned_memcpy15: 542; RV32: # %bb.0: # %entry 543; RV32-NEXT: lbu a2, 14(a1) 544; RV32-NEXT: sb a2, 14(a0) 545; RV32-NEXT: lh a2, 12(a1) 546; RV32-NEXT: sh a2, 12(a0) 547; RV32-NEXT: lw a2, 8(a1) 548; RV32-NEXT: sw a2, 8(a0) 549; RV32-NEXT: lw a2, 4(a1) 550; RV32-NEXT: sw a2, 4(a0) 551; RV32-NEXT: lw a1, 0(a1) 552; RV32-NEXT: sw a1, 0(a0) 553; RV32-NEXT: ret 554; 555; RV64-LABEL: aligned_memcpy15: 556; RV64: # %bb.0: # %entry 557; RV64-NEXT: lbu a2, 14(a1) 558; RV64-NEXT: sb a2, 14(a0) 559; RV64-NEXT: lh a2, 12(a1) 560; RV64-NEXT: sh a2, 12(a0) 561; RV64-NEXT: lw a2, 8(a1) 562; RV64-NEXT: sw a2, 8(a0) 563; RV64-NEXT: ld a1, 0(a1) 564; RV64-NEXT: sd a1, 0(a0) 565; RV64-NEXT: ret 566; 567; RV32-FAST-LABEL: aligned_memcpy15: 568; RV32-FAST: # %bb.0: # %entry 569; RV32-FAST-NEXT: lw a2, 11(a1) 570; RV32-FAST-NEXT: sw a2, 11(a0) 571; RV32-FAST-NEXT: lw a2, 8(a1) 572; RV32-FAST-NEXT: sw a2, 8(a0) 573; RV32-FAST-NEXT: lw a2, 4(a1) 574; RV32-FAST-NEXT: sw a2, 4(a0) 575; RV32-FAST-NEXT: lw a1, 0(a1) 576; RV32-FAST-NEXT: sw a1, 0(a0) 577; RV32-FAST-NEXT: ret 578; 579; RV64-FAST-LABEL: aligned_memcpy15: 580; RV64-FAST: # %bb.0: # %entry 581; RV64-FAST-NEXT: ld a2, 7(a1) 582; RV64-FAST-NEXT: sd a2, 7(a0) 583; RV64-FAST-NEXT: ld a1, 0(a1) 584; RV64-FAST-NEXT: sd a1, 0(a0) 585; RV64-FAST-NEXT: ret 586entry: 587 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false) 588 ret void 589} 590 591define void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind { 592; RV32-BOTH-LABEL: aligned_memcpy16: 593; RV32-BOTH: # %bb.0: # %entry 594; RV32-BOTH-NEXT: lw a2, 12(a1) 595; RV32-BOTH-NEXT: sw a2, 12(a0) 596; RV32-BOTH-NEXT: lw a2, 8(a1) 597; RV32-BOTH-NEXT: sw a2, 8(a0) 598; RV32-BOTH-NEXT: lw a2, 4(a1) 599; RV32-BOTH-NEXT: sw a2, 4(a0) 600; RV32-BOTH-NEXT: lw a1, 0(a1) 601; RV32-BOTH-NEXT: sw a1, 0(a0) 602; RV32-BOTH-NEXT: ret 603; 604; RV64-BOTH-LABEL: aligned_memcpy16: 605; RV64-BOTH: # %bb.0: # %entry 606; RV64-BOTH-NEXT: ld a2, 8(a1) 607; RV64-BOTH-NEXT: sd a2, 8(a0) 608; RV64-BOTH-NEXT: ld a1, 0(a1) 609; RV64-BOTH-NEXT: sd a1, 0(a0) 610; RV64-BOTH-NEXT: ret 611entry: 612 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false) 613 ret void 614} 615 616define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { 617; RV32-LABEL: aligned_memcpy31: 618; RV32: # %bb.0: # %entry 619; RV32-NEXT: li a2, 31 620; RV32-NEXT: tail memcpy 621; 622; RV64-LABEL: aligned_memcpy31: 623; RV64: # %bb.0: # %entry 624; RV64-NEXT: lbu a2, 30(a1) 625; RV64-NEXT: sb a2, 30(a0) 626; RV64-NEXT: lh a2, 28(a1) 627; RV64-NEXT: sh a2, 28(a0) 628; RV64-NEXT: lw a2, 24(a1) 629; RV64-NEXT: sw a2, 24(a0) 630; RV64-NEXT: ld a2, 16(a1) 631; RV64-NEXT: sd a2, 16(a0) 632; RV64-NEXT: ld a2, 8(a1) 633; RV64-NEXT: sd a2, 8(a0) 634; RV64-NEXT: ld a1, 0(a1) 635; RV64-NEXT: sd a1, 0(a0) 636; RV64-NEXT: ret 637; 638; RV32-FAST-LABEL: aligned_memcpy31: 639; RV32-FAST: # %bb.0: # %entry 640; RV32-FAST-NEXT: lw a2, 27(a1) 641; RV32-FAST-NEXT: sw a2, 27(a0) 642; RV32-FAST-NEXT: lw a2, 24(a1) 643; RV32-FAST-NEXT: sw a2, 24(a0) 644; RV32-FAST-NEXT: lw a2, 20(a1) 645; RV32-FAST-NEXT: sw a2, 20(a0) 646; RV32-FAST-NEXT: lw a2, 16(a1) 647; RV32-FAST-NEXT: sw a2, 16(a0) 648; RV32-FAST-NEXT: lw a2, 12(a1) 649; RV32-FAST-NEXT: sw a2, 12(a0) 650; RV32-FAST-NEXT: lw a2, 8(a1) 651; RV32-FAST-NEXT: sw a2, 8(a0) 652; RV32-FAST-NEXT: lw a2, 4(a1) 653; RV32-FAST-NEXT: sw a2, 4(a0) 654; RV32-FAST-NEXT: lw a1, 0(a1) 655; RV32-FAST-NEXT: sw a1, 0(a0) 656; RV32-FAST-NEXT: ret 657; 658; RV64-FAST-LABEL: aligned_memcpy31: 659; RV64-FAST: # %bb.0: # %entry 660; RV64-FAST-NEXT: ld a2, 23(a1) 661; RV64-FAST-NEXT: sd a2, 23(a0) 662; RV64-FAST-NEXT: ld a2, 16(a1) 663; RV64-FAST-NEXT: sd a2, 16(a0) 664; RV64-FAST-NEXT: ld a2, 8(a1) 665; RV64-FAST-NEXT: sd a2, 8(a0) 666; RV64-FAST-NEXT: ld a1, 0(a1) 667; RV64-FAST-NEXT: sd a1, 0(a0) 668; RV64-FAST-NEXT: ret 669entry: 670 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false) 671 ret void 672} 673 674; ------------------------------------------------------------------------ 675; A few partially aligned cases 676 677 678define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind { 679; RV32-BOTH-LABEL: memcpy16_align4: 680; RV32-BOTH: # %bb.0: # %entry 681; RV32-BOTH-NEXT: lw a2, 12(a1) 682; RV32-BOTH-NEXT: sw a2, 12(a0) 683; RV32-BOTH-NEXT: lw a2, 8(a1) 684; RV32-BOTH-NEXT: sw a2, 8(a0) 685; RV32-BOTH-NEXT: lw a2, 4(a1) 686; RV32-BOTH-NEXT: sw a2, 4(a0) 687; RV32-BOTH-NEXT: lw a1, 0(a1) 688; RV32-BOTH-NEXT: sw a1, 0(a0) 689; RV32-BOTH-NEXT: ret 690; 691; RV64-LABEL: memcpy16_align4: 692; RV64: # %bb.0: # %entry 693; RV64-NEXT: lw a2, 12(a1) 694; RV64-NEXT: sw a2, 12(a0) 695; RV64-NEXT: lw a2, 8(a1) 696; RV64-NEXT: sw a2, 8(a0) 697; RV64-NEXT: lw a2, 4(a1) 698; RV64-NEXT: sw a2, 4(a0) 699; RV64-NEXT: lw a1, 0(a1) 700; RV64-NEXT: sw a1, 0(a0) 701; RV64-NEXT: ret 702; 703; RV64-FAST-LABEL: memcpy16_align4: 704; RV64-FAST: # %bb.0: # %entry 705; RV64-FAST-NEXT: ld a2, 8(a1) 706; RV64-FAST-NEXT: sd a2, 8(a0) 707; RV64-FAST-NEXT: ld a1, 0(a1) 708; RV64-FAST-NEXT: sd a1, 0(a0) 709; RV64-FAST-NEXT: ret 710entry: 711 tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false) 712 ret void 713} 714 715define i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) { 716; RV32-LABEL: memcpy11_align8: 717; RV32: # %bb.0: # %entry 718; RV32-NEXT: lbu a2, 10(a1) 719; RV32-NEXT: sb a2, 10(a0) 720; RV32-NEXT: lh a2, 8(a1) 721; RV32-NEXT: sh a2, 8(a0) 722; RV32-NEXT: lw a2, 4(a1) 723; RV32-NEXT: sw a2, 4(a0) 724; RV32-NEXT: lw a1, 0(a1) 725; RV32-NEXT: sw a1, 0(a0) 726; RV32-NEXT: li a0, 0 727; RV32-NEXT: ret 728; 729; RV64-LABEL: memcpy11_align8: 730; RV64: # %bb.0: # %entry 731; RV64-NEXT: lbu a2, 10(a1) 732; RV64-NEXT: sb a2, 10(a0) 733; RV64-NEXT: lh a2, 8(a1) 734; RV64-NEXT: sh a2, 8(a0) 735; RV64-NEXT: ld a1, 0(a1) 736; RV64-NEXT: sd a1, 0(a0) 737; RV64-NEXT: li a0, 0 738; RV64-NEXT: ret 739; 740; RV32-FAST-LABEL: memcpy11_align8: 741; RV32-FAST: # %bb.0: # %entry 742; RV32-FAST-NEXT: lw a2, 7(a1) 743; RV32-FAST-NEXT: sw a2, 7(a0) 744; RV32-FAST-NEXT: lw a2, 4(a1) 745; RV32-FAST-NEXT: sw a2, 4(a0) 746; RV32-FAST-NEXT: lw a1, 0(a1) 747; RV32-FAST-NEXT: sw a1, 0(a0) 748; RV32-FAST-NEXT: li a0, 0 749; RV32-FAST-NEXT: ret 750; 751; RV64-FAST-LABEL: memcpy11_align8: 752; RV64-FAST: # %bb.0: # %entry 753; RV64-FAST-NEXT: lw a2, 7(a1) 754; RV64-FAST-NEXT: sw a2, 7(a0) 755; RV64-FAST-NEXT: ld a1, 0(a1) 756; RV64-FAST-NEXT: sd a1, 0(a0) 757; RV64-FAST-NEXT: li a0, 0 758; RV64-FAST-NEXT: ret 759entry: 760 call void @llvm.memcpy.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false) 761 ret i32 0 762} 763 764declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind 765declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind 766