1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v \ 3; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 4; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ 5; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 6; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ 7; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST 8; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ 9; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST 10 11; ---------------------------------------------------------------------- 12; Fully unaligned cases 13 14 15define void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind { 16; RV32-BOTH-LABEL: unaligned_memcpy1: 17; RV32-BOTH: # %bb.0: # %entry 18; RV32-BOTH-NEXT: lbu a1, 0(a1) 19; RV32-BOTH-NEXT: sb a1, 0(a0) 20; RV32-BOTH-NEXT: ret 21; 22; RV64-BOTH-LABEL: unaligned_memcpy1: 23; RV64-BOTH: # %bb.0: # %entry 24; RV64-BOTH-NEXT: lbu a1, 0(a1) 25; RV64-BOTH-NEXT: sb a1, 0(a0) 26; RV64-BOTH-NEXT: ret 27entry: 28 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false) 29 ret void 30} 31 32define void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind { 33; RV32-LABEL: unaligned_memcpy2: 34; RV32: # %bb.0: # %entry 35; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 36; RV32-NEXT: vle8.v v8, (a1) 37; RV32-NEXT: vse8.v v8, (a0) 38; RV32-NEXT: ret 39; 40; RV64-LABEL: unaligned_memcpy2: 41; RV64: # %bb.0: # %entry 42; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 43; RV64-NEXT: vle8.v v8, (a1) 44; RV64-NEXT: vse8.v v8, (a0) 45; RV64-NEXT: ret 46; 47; RV32-FAST-LABEL: unaligned_memcpy2: 48; RV32-FAST: # %bb.0: # %entry 49; RV32-FAST-NEXT: lh a1, 0(a1) 50; RV32-FAST-NEXT: sh a1, 0(a0) 51; RV32-FAST-NEXT: ret 52; 53; RV64-FAST-LABEL: unaligned_memcpy2: 54; RV64-FAST: # %bb.0: # %entry 55; RV64-FAST-NEXT: lh a1, 0(a1) 56; RV64-FAST-NEXT: sh a1, 0(a0) 57; RV64-FAST-NEXT: ret 58entry: 59 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false) 60 ret void 61} 62 63define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind { 64; RV32-LABEL: unaligned_memcpy3: 65; RV32: # %bb.0: # %entry 66; RV32-NEXT: lbu a2, 2(a1) 67; RV32-NEXT: sb a2, 2(a0) 68; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 69; RV32-NEXT: vle8.v v8, (a1) 70; RV32-NEXT: vse8.v v8, (a0) 71; RV32-NEXT: ret 72; 73; RV64-LABEL: unaligned_memcpy3: 74; RV64: # %bb.0: # %entry 75; RV64-NEXT: lbu a2, 2(a1) 76; RV64-NEXT: sb a2, 2(a0) 77; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 78; RV64-NEXT: vle8.v v8, (a1) 79; RV64-NEXT: vse8.v v8, (a0) 80; RV64-NEXT: ret 81; 82; RV32-FAST-LABEL: unaligned_memcpy3: 83; RV32-FAST: # %bb.0: # %entry 84; RV32-FAST-NEXT: lbu a2, 2(a1) 85; RV32-FAST-NEXT: sb a2, 2(a0) 86; RV32-FAST-NEXT: lh a1, 0(a1) 87; RV32-FAST-NEXT: sh a1, 0(a0) 88; RV32-FAST-NEXT: ret 89; 90; RV64-FAST-LABEL: unaligned_memcpy3: 91; RV64-FAST: # %bb.0: # %entry 92; RV64-FAST-NEXT: lbu a2, 2(a1) 93; RV64-FAST-NEXT: sb a2, 2(a0) 94; RV64-FAST-NEXT: lh a1, 0(a1) 95; RV64-FAST-NEXT: sh a1, 0(a0) 96; RV64-FAST-NEXT: ret 97entry: 98 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false) 99 ret void 100} 101 102define void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind { 103; RV32-LABEL: unaligned_memcpy4: 104; RV32: # %bb.0: # %entry 105; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 106; RV32-NEXT: vle8.v v8, (a1) 107; RV32-NEXT: vse8.v v8, (a0) 108; RV32-NEXT: ret 109; 110; RV64-LABEL: unaligned_memcpy4: 111; RV64: # %bb.0: # %entry 112; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 113; RV64-NEXT: vle8.v v8, (a1) 114; RV64-NEXT: vse8.v v8, (a0) 115; RV64-NEXT: ret 116; 117; RV32-FAST-LABEL: unaligned_memcpy4: 118; RV32-FAST: # %bb.0: # %entry 119; RV32-FAST-NEXT: lw a1, 0(a1) 120; RV32-FAST-NEXT: sw a1, 0(a0) 121; RV32-FAST-NEXT: ret 122; 123; RV64-FAST-LABEL: unaligned_memcpy4: 124; RV64-FAST: # %bb.0: # %entry 125; RV64-FAST-NEXT: lw a1, 0(a1) 126; RV64-FAST-NEXT: sw a1, 0(a0) 127; RV64-FAST-NEXT: ret 128entry: 129 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false) 130 ret void 131} 132 133define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { 134; RV32-LABEL: unaligned_memcpy7: 135; RV32: # %bb.0: # %entry 136; RV32-NEXT: lbu a2, 6(a1) 137; RV32-NEXT: sb a2, 6(a0) 138; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 139; RV32-NEXT: vle8.v v8, (a1) 140; RV32-NEXT: vse8.v v8, (a0) 141; RV32-NEXT: addi a1, a1, 4 142; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 143; RV32-NEXT: vle8.v v8, (a1) 144; RV32-NEXT: addi a0, a0, 4 145; RV32-NEXT: vse8.v v8, (a0) 146; RV32-NEXT: ret 147; 148; RV64-LABEL: unaligned_memcpy7: 149; RV64: # %bb.0: # %entry 150; RV64-NEXT: lbu a2, 6(a1) 151; RV64-NEXT: sb a2, 6(a0) 152; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 153; RV64-NEXT: vle8.v v8, (a1) 154; RV64-NEXT: vse8.v v8, (a0) 155; RV64-NEXT: addi a1, a1, 4 156; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 157; RV64-NEXT: vle8.v v8, (a1) 158; RV64-NEXT: addi a0, a0, 4 159; RV64-NEXT: vse8.v v8, (a0) 160; RV64-NEXT: ret 161; 162; RV32-FAST-LABEL: unaligned_memcpy7: 163; RV32-FAST: # %bb.0: # %entry 164; RV32-FAST-NEXT: lw a2, 3(a1) 165; RV32-FAST-NEXT: sw a2, 3(a0) 166; RV32-FAST-NEXT: lw a1, 0(a1) 167; RV32-FAST-NEXT: sw a1, 0(a0) 168; RV32-FAST-NEXT: ret 169; 170; RV64-FAST-LABEL: unaligned_memcpy7: 171; RV64-FAST: # %bb.0: # %entry 172; RV64-FAST-NEXT: lw a2, 3(a1) 173; RV64-FAST-NEXT: sw a2, 3(a0) 174; RV64-FAST-NEXT: lw a1, 0(a1) 175; RV64-FAST-NEXT: sw a1, 0(a0) 176; RV64-FAST-NEXT: ret 177entry: 178 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false) 179 ret void 180} 181 182define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind { 183; RV32-LABEL: unaligned_memcpy8: 184; RV32: # %bb.0: # %entry 185; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 186; RV32-NEXT: vle8.v v8, (a1) 187; RV32-NEXT: vse8.v v8, (a0) 188; RV32-NEXT: ret 189; 190; RV64-LABEL: unaligned_memcpy8: 191; RV64: # %bb.0: # %entry 192; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 193; RV64-NEXT: vle8.v v8, (a1) 194; RV64-NEXT: vse8.v v8, (a0) 195; RV64-NEXT: ret 196; 197; RV32-FAST-LABEL: unaligned_memcpy8: 198; RV32-FAST: # %bb.0: # %entry 199; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 200; RV32-FAST-NEXT: vle32.v v8, (a1) 201; RV32-FAST-NEXT: vse32.v v8, (a0) 202; RV32-FAST-NEXT: ret 203; 204; RV64-FAST-LABEL: unaligned_memcpy8: 205; RV64-FAST: # %bb.0: # %entry 206; RV64-FAST-NEXT: ld a1, 0(a1) 207; RV64-FAST-NEXT: sd a1, 0(a0) 208; RV64-FAST-NEXT: ret 209entry: 210 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false) 211 ret void 212} 213 214define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { 215; RV32-LABEL: unaligned_memcpy15: 216; RV32: # %bb.0: # %entry 217; RV32-NEXT: lbu a2, 14(a1) 218; RV32-NEXT: sb a2, 14(a0) 219; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 220; RV32-NEXT: vle8.v v8, (a1) 221; RV32-NEXT: addi a2, a1, 12 222; RV32-NEXT: vse8.v v8, (a0) 223; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 224; RV32-NEXT: vle8.v v8, (a2) 225; RV32-NEXT: addi a2, a0, 12 226; RV32-NEXT: vse8.v v8, (a2) 227; RV32-NEXT: addi a1, a1, 8 228; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 229; RV32-NEXT: vle8.v v8, (a1) 230; RV32-NEXT: addi a0, a0, 8 231; RV32-NEXT: vse8.v v8, (a0) 232; RV32-NEXT: ret 233; 234; RV64-LABEL: unaligned_memcpy15: 235; RV64: # %bb.0: # %entry 236; RV64-NEXT: lbu a2, 14(a1) 237; RV64-NEXT: sb a2, 14(a0) 238; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 239; RV64-NEXT: vle8.v v8, (a1) 240; RV64-NEXT: addi a2, a1, 12 241; RV64-NEXT: vse8.v v8, (a0) 242; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 243; RV64-NEXT: vle8.v v8, (a2) 244; RV64-NEXT: addi a2, a0, 12 245; RV64-NEXT: vse8.v v8, (a2) 246; RV64-NEXT: addi a1, a1, 8 247; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 248; RV64-NEXT: vle8.v v8, (a1) 249; RV64-NEXT: addi a0, a0, 8 250; RV64-NEXT: vse8.v v8, (a0) 251; RV64-NEXT: ret 252; 253; RV32-FAST-LABEL: unaligned_memcpy15: 254; RV32-FAST: # %bb.0: # %entry 255; RV32-FAST-NEXT: lw a2, 11(a1) 256; RV32-FAST-NEXT: sw a2, 11(a0) 257; RV32-FAST-NEXT: lw a2, 8(a1) 258; RV32-FAST-NEXT: sw a2, 8(a0) 259; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 260; RV32-FAST-NEXT: vle32.v v8, (a1) 261; RV32-FAST-NEXT: vse32.v v8, (a0) 262; RV32-FAST-NEXT: ret 263; 264; RV64-FAST-LABEL: unaligned_memcpy15: 265; RV64-FAST: # %bb.0: # %entry 266; RV64-FAST-NEXT: ld a2, 7(a1) 267; RV64-FAST-NEXT: sd a2, 7(a0) 268; RV64-FAST-NEXT: ld a1, 0(a1) 269; RV64-FAST-NEXT: sd a1, 0(a0) 270; RV64-FAST-NEXT: ret 271entry: 272 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false) 273 ret void 274} 275 276define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind { 277; RV32-LABEL: unaligned_memcpy16: 278; RV32: # %bb.0: # %entry 279; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma 280; RV32-NEXT: vle8.v v8, (a1) 281; RV32-NEXT: vse8.v v8, (a0) 282; RV32-NEXT: ret 283; 284; RV64-LABEL: unaligned_memcpy16: 285; RV64: # %bb.0: # %entry 286; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma 287; RV64-NEXT: vle8.v v8, (a1) 288; RV64-NEXT: vse8.v v8, (a0) 289; RV64-NEXT: ret 290; 291; RV32-FAST-LABEL: unaligned_memcpy16: 292; RV32-FAST: # %bb.0: # %entry 293; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 294; RV32-FAST-NEXT: vle64.v v8, (a1) 295; RV32-FAST-NEXT: vse64.v v8, (a0) 296; RV32-FAST-NEXT: ret 297; 298; RV64-FAST-LABEL: unaligned_memcpy16: 299; RV64-FAST: # %bb.0: # %entry 300; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 301; RV64-FAST-NEXT: vle64.v v8, (a1) 302; RV64-FAST-NEXT: vse64.v v8, (a0) 303; RV64-FAST-NEXT: ret 304entry: 305 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false) 306 ret void 307} 308 309define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { 310; RV32-LABEL: unaligned_memcpy31: 311; RV32: # %bb.0: # %entry 312; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma 313; RV32-NEXT: vle8.v v8, (a1) 314; RV32-NEXT: vse8.v v8, (a0) 315; RV32-NEXT: addi a1, a1, 15 316; RV32-NEXT: vle8.v v8, (a1) 317; RV32-NEXT: addi a0, a0, 15 318; RV32-NEXT: vse8.v v8, (a0) 319; RV32-NEXT: ret 320; 321; RV64-LABEL: unaligned_memcpy31: 322; RV64: # %bb.0: # %entry 323; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma 324; RV64-NEXT: vle8.v v8, (a1) 325; RV64-NEXT: vse8.v v8, (a0) 326; RV64-NEXT: addi a1, a1, 15 327; RV64-NEXT: vle8.v v8, (a1) 328; RV64-NEXT: addi a0, a0, 15 329; RV64-NEXT: vse8.v v8, (a0) 330; RV64-NEXT: ret 331; 332; RV32-FAST-LABEL: unaligned_memcpy31: 333; RV32-FAST: # %bb.0: # %entry 334; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 335; RV32-FAST-NEXT: vle64.v v8, (a1) 336; RV32-FAST-NEXT: vse64.v v8, (a0) 337; RV32-FAST-NEXT: addi a1, a1, 15 338; RV32-FAST-NEXT: vle64.v v8, (a1) 339; RV32-FAST-NEXT: addi a0, a0, 15 340; RV32-FAST-NEXT: vse64.v v8, (a0) 341; RV32-FAST-NEXT: ret 342; 343; RV64-FAST-LABEL: unaligned_memcpy31: 344; RV64-FAST: # %bb.0: # %entry 345; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 346; RV64-FAST-NEXT: vle64.v v8, (a1) 347; RV64-FAST-NEXT: vse64.v v8, (a0) 348; RV64-FAST-NEXT: addi a1, a1, 15 349; RV64-FAST-NEXT: vle64.v v8, (a1) 350; RV64-FAST-NEXT: addi a0, a0, 15 351; RV64-FAST-NEXT: vse64.v v8, (a0) 352; RV64-FAST-NEXT: ret 353entry: 354 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false) 355 ret void 356} 357 358define void @unaligned_memcpy32(ptr nocapture %dest, ptr %src) nounwind { 359; RV32-LABEL: unaligned_memcpy32: 360; RV32: # %bb.0: # %entry 361; RV32-NEXT: li a2, 32 362; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma 363; RV32-NEXT: vle8.v v8, (a1) 364; RV32-NEXT: vse8.v v8, (a0) 365; RV32-NEXT: ret 366; 367; RV64-LABEL: unaligned_memcpy32: 368; RV64: # %bb.0: # %entry 369; RV64-NEXT: li a2, 32 370; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma 371; RV64-NEXT: vle8.v v8, (a1) 372; RV64-NEXT: vse8.v v8, (a0) 373; RV64-NEXT: ret 374; 375; RV32-FAST-LABEL: unaligned_memcpy32: 376; RV32-FAST: # %bb.0: # %entry 377; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma 378; RV32-FAST-NEXT: vle64.v v8, (a1) 379; RV32-FAST-NEXT: vse64.v v8, (a0) 380; RV32-FAST-NEXT: ret 381; 382; RV64-FAST-LABEL: unaligned_memcpy32: 383; RV64-FAST: # %bb.0: # %entry 384; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma 385; RV64-FAST-NEXT: vle64.v v8, (a1) 386; RV64-FAST-NEXT: vse64.v v8, (a0) 387; RV64-FAST-NEXT: ret 388entry: 389 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 32, i1 false) 390 ret void 391} 392 393define void @unaligned_memcpy64(ptr nocapture %dest, ptr %src) nounwind { 394; RV32-LABEL: unaligned_memcpy64: 395; RV32: # %bb.0: # %entry 396; RV32-NEXT: li a2, 64 397; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma 398; RV32-NEXT: vle8.v v8, (a1) 399; RV32-NEXT: vse8.v v8, (a0) 400; RV32-NEXT: ret 401; 402; RV64-LABEL: unaligned_memcpy64: 403; RV64: # %bb.0: # %entry 404; RV64-NEXT: li a2, 64 405; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma 406; RV64-NEXT: vle8.v v8, (a1) 407; RV64-NEXT: vse8.v v8, (a0) 408; RV64-NEXT: ret 409; 410; RV32-FAST-LABEL: unaligned_memcpy64: 411; RV32-FAST: # %bb.0: # %entry 412; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 413; RV32-FAST-NEXT: vle64.v v8, (a1) 414; RV32-FAST-NEXT: vse64.v v8, (a0) 415; RV32-FAST-NEXT: ret 416; 417; RV64-FAST-LABEL: unaligned_memcpy64: 418; RV64-FAST: # %bb.0: # %entry 419; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 420; RV64-FAST-NEXT: vle64.v v8, (a1) 421; RV64-FAST-NEXT: vse64.v v8, (a0) 422; RV64-FAST-NEXT: ret 423entry: 424 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 64, i1 false) 425 ret void 426} 427 428define void @unaligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { 429; RV32-LABEL: unaligned_memcpy96: 430; RV32: # %bb.0: # %entry 431; RV32-NEXT: li a2, 64 432; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma 433; RV32-NEXT: vle8.v v8, (a1) 434; RV32-NEXT: addi a1, a1, 64 435; RV32-NEXT: vse8.v v8, (a0) 436; RV32-NEXT: li a2, 32 437; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma 438; RV32-NEXT: vle8.v v8, (a1) 439; RV32-NEXT: addi a0, a0, 64 440; RV32-NEXT: vse8.v v8, (a0) 441; RV32-NEXT: ret 442; 443; RV64-LABEL: unaligned_memcpy96: 444; RV64: # %bb.0: # %entry 445; RV64-NEXT: li a2, 64 446; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma 447; RV64-NEXT: vle8.v v8, (a1) 448; RV64-NEXT: addi a1, a1, 64 449; RV64-NEXT: vse8.v v8, (a0) 450; RV64-NEXT: li a2, 32 451; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma 452; RV64-NEXT: vle8.v v8, (a1) 453; RV64-NEXT: addi a0, a0, 64 454; RV64-NEXT: vse8.v v8, (a0) 455; RV64-NEXT: ret 456; 457; RV32-FAST-LABEL: unaligned_memcpy96: 458; RV32-FAST: # %bb.0: # %entry 459; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 460; RV32-FAST-NEXT: vle64.v v8, (a1) 461; RV32-FAST-NEXT: vse64.v v8, (a0) 462; RV32-FAST-NEXT: addi a1, a1, 64 463; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma 464; RV32-FAST-NEXT: vle64.v v8, (a1) 465; RV32-FAST-NEXT: addi a0, a0, 64 466; RV32-FAST-NEXT: vse64.v v8, (a0) 467; RV32-FAST-NEXT: ret 468; 469; RV64-FAST-LABEL: unaligned_memcpy96: 470; RV64-FAST: # %bb.0: # %entry 471; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 472; RV64-FAST-NEXT: vle64.v v8, (a1) 473; RV64-FAST-NEXT: vse64.v v8, (a0) 474; RV64-FAST-NEXT: addi a1, a1, 64 475; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma 476; RV64-FAST-NEXT: vle64.v v8, (a1) 477; RV64-FAST-NEXT: addi a0, a0, 64 478; RV64-FAST-NEXT: vse64.v v8, (a0) 479; RV64-FAST-NEXT: ret 480entry: 481 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 96, i1 false) 482 ret void 483} 484 485define void @unaligned_memcpy128(ptr nocapture %dest, ptr %src) nounwind { 486; RV32-LABEL: unaligned_memcpy128: 487; RV32: # %bb.0: # %entry 488; RV32-NEXT: li a2, 128 489; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma 490; RV32-NEXT: vle8.v v8, (a1) 491; RV32-NEXT: vse8.v v8, (a0) 492; RV32-NEXT: ret 493; 494; RV64-LABEL: unaligned_memcpy128: 495; RV64: # %bb.0: # %entry 496; RV64-NEXT: li a2, 128 497; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma 498; RV64-NEXT: vle8.v v8, (a1) 499; RV64-NEXT: vse8.v v8, (a0) 500; RV64-NEXT: ret 501; 502; RV32-FAST-LABEL: unaligned_memcpy128: 503; RV32-FAST: # %bb.0: # %entry 504; RV32-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 505; RV32-FAST-NEXT: vle64.v v8, (a1) 506; RV32-FAST-NEXT: vse64.v v8, (a0) 507; RV32-FAST-NEXT: ret 508; 509; RV64-FAST-LABEL: unaligned_memcpy128: 510; RV64-FAST: # %bb.0: # %entry 511; RV64-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 512; RV64-FAST-NEXT: vle64.v v8, (a1) 513; RV64-FAST-NEXT: vse64.v v8, (a0) 514; RV64-FAST-NEXT: ret 515entry: 516 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 128, i1 false) 517 ret void 518} 519 520define void @unaligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { 521; RV32-LABEL: unaligned_memcpy196: 522; RV32: # %bb.0: # %entry 523; RV32-NEXT: li a2, 128 524; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma 525; RV32-NEXT: vle8.v v8, (a1) 526; RV32-NEXT: addi a2, a1, 128 527; RV32-NEXT: vse8.v v8, (a0) 528; RV32-NEXT: li a3, 64 529; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma 530; RV32-NEXT: vle8.v v8, (a2) 531; RV32-NEXT: addi a2, a0, 128 532; RV32-NEXT: vse8.v v8, (a2) 533; RV32-NEXT: lbu a2, 195(a1) 534; RV32-NEXT: sb a2, 195(a0) 535; RV32-NEXT: lbu a2, 194(a1) 536; RV32-NEXT: sb a2, 194(a0) 537; RV32-NEXT: lbu a2, 193(a1) 538; RV32-NEXT: sb a2, 193(a0) 539; RV32-NEXT: lbu a1, 192(a1) 540; RV32-NEXT: sb a1, 192(a0) 541; RV32-NEXT: ret 542; 543; RV64-LABEL: unaligned_memcpy196: 544; RV64: # %bb.0: # %entry 545; RV64-NEXT: li a2, 128 546; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma 547; RV64-NEXT: vle8.v v8, (a1) 548; RV64-NEXT: addi a2, a1, 128 549; RV64-NEXT: vse8.v v8, (a0) 550; RV64-NEXT: li a3, 64 551; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma 552; RV64-NEXT: vle8.v v8, (a2) 553; RV64-NEXT: addi a2, a0, 128 554; RV64-NEXT: vse8.v v8, (a2) 555; RV64-NEXT: lbu a2, 195(a1) 556; RV64-NEXT: sb a2, 195(a0) 557; RV64-NEXT: lbu a2, 194(a1) 558; RV64-NEXT: sb a2, 194(a0) 559; RV64-NEXT: lbu a2, 193(a1) 560; RV64-NEXT: sb a2, 193(a0) 561; RV64-NEXT: lbu a1, 192(a1) 562; RV64-NEXT: sb a1, 192(a0) 563; RV64-NEXT: ret 564; 565; RV32-FAST-LABEL: unaligned_memcpy196: 566; RV32-FAST: # %bb.0: # %entry 567; RV32-FAST-NEXT: lw a2, 192(a1) 568; RV32-FAST-NEXT: sw a2, 192(a0) 569; RV32-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 570; RV32-FAST-NEXT: vle64.v v8, (a1) 571; RV32-FAST-NEXT: vse64.v v8, (a0) 572; RV32-FAST-NEXT: addi a1, a1, 128 573; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 574; RV32-FAST-NEXT: vle64.v v8, (a1) 575; RV32-FAST-NEXT: addi a0, a0, 128 576; RV32-FAST-NEXT: vse64.v v8, (a0) 577; RV32-FAST-NEXT: ret 578; 579; RV64-FAST-LABEL: unaligned_memcpy196: 580; RV64-FAST: # %bb.0: # %entry 581; RV64-FAST-NEXT: lw a2, 192(a1) 582; RV64-FAST-NEXT: sw a2, 192(a0) 583; RV64-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 584; RV64-FAST-NEXT: vle64.v v8, (a1) 585; RV64-FAST-NEXT: vse64.v v8, (a0) 586; RV64-FAST-NEXT: addi a1, a1, 128 587; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma 588; RV64-FAST-NEXT: vle64.v v8, (a1) 589; RV64-FAST-NEXT: addi a0, a0, 128 590; RV64-FAST-NEXT: vse64.v v8, (a0) 591; RV64-FAST-NEXT: ret 592entry: 593 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 196, i1 false) 594 ret void 595} 596 597define void @unaligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { 598; RV32-LABEL: unaligned_memcpy256: 599; RV32: # %bb.0: # %entry 600; RV32-NEXT: li a2, 128 601; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma 602; RV32-NEXT: vle8.v v8, (a1) 603; RV32-NEXT: vse8.v v8, (a0) 604; RV32-NEXT: addi a1, a1, 128 605; RV32-NEXT: vle8.v v8, (a1) 606; RV32-NEXT: addi a0, a0, 128 607; RV32-NEXT: vse8.v v8, (a0) 608; RV32-NEXT: ret 609; 610; RV64-LABEL: unaligned_memcpy256: 611; RV64: # %bb.0: # %entry 612; RV64-NEXT: li a2, 128 613; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma 614; RV64-NEXT: vle8.v v8, (a1) 615; RV64-NEXT: vse8.v v8, (a0) 616; RV64-NEXT: addi a1, a1, 128 617; RV64-NEXT: vle8.v v8, (a1) 618; RV64-NEXT: addi a0, a0, 128 619; RV64-NEXT: vse8.v v8, (a0) 620; RV64-NEXT: ret 621; 622; RV32-FAST-LABEL: unaligned_memcpy256: 623; RV32-FAST: # %bb.0: # %entry 624; RV32-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 625; RV32-FAST-NEXT: vle64.v v8, (a1) 626; RV32-FAST-NEXT: vse64.v v8, (a0) 627; RV32-FAST-NEXT: addi a1, a1, 128 628; RV32-FAST-NEXT: vle64.v v8, (a1) 629; RV32-FAST-NEXT: addi a0, a0, 128 630; RV32-FAST-NEXT: vse64.v v8, (a0) 631; RV32-FAST-NEXT: ret 632; 633; RV64-FAST-LABEL: unaligned_memcpy256: 634; RV64-FAST: # %bb.0: # %entry 635; RV64-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma 636; RV64-FAST-NEXT: vle64.v v8, (a1) 637; RV64-FAST-NEXT: vse64.v v8, (a0) 638; RV64-FAST-NEXT: addi a1, a1, 128 639; RV64-FAST-NEXT: vle64.v v8, (a1) 640; RV64-FAST-NEXT: addi a0, a0, 128 641; RV64-FAST-NEXT: vse64.v v8, (a0) 642; RV64-FAST-NEXT: ret 643entry: 644 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 256, i1 false) 645 ret void 646} 647 648 649; ---------------------------------------------------------------------- 650; Fully aligned cases 651 652define void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind { 653; RV32-BOTH-LABEL: aligned_memcpy2: 654; RV32-BOTH: # %bb.0: # %entry 655; RV32-BOTH-NEXT: lh a1, 0(a1) 656; RV32-BOTH-NEXT: sh a1, 0(a0) 657; RV32-BOTH-NEXT: ret 658; 659; RV64-BOTH-LABEL: aligned_memcpy2: 660; RV64-BOTH: # %bb.0: # %entry 661; RV64-BOTH-NEXT: lh a1, 0(a1) 662; RV64-BOTH-NEXT: sh a1, 0(a0) 663; RV64-BOTH-NEXT: ret 664entry: 665 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false) 666 ret void 667} 668 669define void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind { 670; RV32-BOTH-LABEL: aligned_memcpy3: 671; RV32-BOTH: # %bb.0: # %entry 672; RV32-BOTH-NEXT: lbu a2, 2(a1) 673; RV32-BOTH-NEXT: sb a2, 2(a0) 674; RV32-BOTH-NEXT: lh a1, 0(a1) 675; RV32-BOTH-NEXT: sh a1, 0(a0) 676; RV32-BOTH-NEXT: ret 677; 678; RV64-BOTH-LABEL: aligned_memcpy3: 679; RV64-BOTH: # %bb.0: # %entry 680; RV64-BOTH-NEXT: lbu a2, 2(a1) 681; RV64-BOTH-NEXT: sb a2, 2(a0) 682; RV64-BOTH-NEXT: lh a1, 0(a1) 683; RV64-BOTH-NEXT: sh a1, 0(a0) 684; RV64-BOTH-NEXT: ret 685entry: 686 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false) 687 ret void 688} 689 690define void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind { 691; RV32-BOTH-LABEL: aligned_memcpy4: 692; RV32-BOTH: # %bb.0: # %entry 693; RV32-BOTH-NEXT: lw a1, 0(a1) 694; RV32-BOTH-NEXT: sw a1, 0(a0) 695; RV32-BOTH-NEXT: ret 696; 697; RV64-BOTH-LABEL: aligned_memcpy4: 698; RV64-BOTH: # %bb.0: # %entry 699; RV64-BOTH-NEXT: lw a1, 0(a1) 700; RV64-BOTH-NEXT: sw a1, 0(a0) 701; RV64-BOTH-NEXT: ret 702entry: 703 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false) 704 ret void 705} 706 707define void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { 708; RV32-LABEL: aligned_memcpy7: 709; RV32: # %bb.0: # %entry 710; RV32-NEXT: lbu a2, 6(a1) 711; RV32-NEXT: sb a2, 6(a0) 712; RV32-NEXT: lh a2, 4(a1) 713; RV32-NEXT: sh a2, 4(a0) 714; RV32-NEXT: lw a1, 0(a1) 715; RV32-NEXT: sw a1, 0(a0) 716; RV32-NEXT: ret 717; 718; RV64-LABEL: aligned_memcpy7: 719; RV64: # %bb.0: # %entry 720; RV64-NEXT: lbu a2, 6(a1) 721; RV64-NEXT: sb a2, 6(a0) 722; RV64-NEXT: lh a2, 4(a1) 723; RV64-NEXT: sh a2, 4(a0) 724; RV64-NEXT: lw a1, 0(a1) 725; RV64-NEXT: sw a1, 0(a0) 726; RV64-NEXT: ret 727; 728; RV32-FAST-LABEL: aligned_memcpy7: 729; RV32-FAST: # %bb.0: # %entry 730; RV32-FAST-NEXT: lw a2, 3(a1) 731; RV32-FAST-NEXT: sw a2, 3(a0) 732; RV32-FAST-NEXT: lw a1, 0(a1) 733; RV32-FAST-NEXT: sw a1, 0(a0) 734; RV32-FAST-NEXT: ret 735; 736; RV64-FAST-LABEL: aligned_memcpy7: 737; RV64-FAST: # %bb.0: # %entry 738; RV64-FAST-NEXT: lw a2, 3(a1) 739; RV64-FAST-NEXT: sw a2, 3(a0) 740; RV64-FAST-NEXT: lw a1, 0(a1) 741; RV64-FAST-NEXT: sw a1, 0(a0) 742; RV64-FAST-NEXT: ret 743entry: 744 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false) 745 ret void 746} 747 748define void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind { 749; RV32-BOTH-LABEL: aligned_memcpy8: 750; RV32-BOTH: # %bb.0: # %entry 751; RV32-BOTH-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 752; RV32-BOTH-NEXT: vle32.v v8, (a1) 753; RV32-BOTH-NEXT: vse32.v v8, (a0) 754; RV32-BOTH-NEXT: ret 755; 756; RV64-BOTH-LABEL: aligned_memcpy8: 757; RV64-BOTH: # %bb.0: # %entry 758; RV64-BOTH-NEXT: ld a1, 0(a1) 759; RV64-BOTH-NEXT: sd a1, 0(a0) 760; RV64-BOTH-NEXT: ret 761entry: 762 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false) 763 ret void 764} 765 766define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { 767; RV32-LABEL: aligned_memcpy15: 768; RV32: # %bb.0: # %entry 769; RV32-NEXT: lbu a2, 14(a1) 770; RV32-NEXT: sb a2, 14(a0) 771; RV32-NEXT: lh a2, 12(a1) 772; RV32-NEXT: sh a2, 12(a0) 773; RV32-NEXT: lw a2, 8(a1) 774; RV32-NEXT: sw a2, 8(a0) 775; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 776; RV32-NEXT: vle32.v v8, (a1) 777; RV32-NEXT: vse32.v v8, (a0) 778; RV32-NEXT: ret 779; 780; RV64-LABEL: aligned_memcpy15: 781; RV64: # %bb.0: # %entry 782; RV64-NEXT: lbu a2, 14(a1) 783; RV64-NEXT: sb a2, 14(a0) 784; RV64-NEXT: lh a2, 12(a1) 785; RV64-NEXT: sh a2, 12(a0) 786; RV64-NEXT: lw a2, 8(a1) 787; RV64-NEXT: sw a2, 8(a0) 788; RV64-NEXT: ld a1, 0(a1) 789; RV64-NEXT: sd a1, 0(a0) 790; RV64-NEXT: ret 791; 792; RV32-FAST-LABEL: aligned_memcpy15: 793; RV32-FAST: # %bb.0: # %entry 794; RV32-FAST-NEXT: lw a2, 11(a1) 795; RV32-FAST-NEXT: sw a2, 11(a0) 796; RV32-FAST-NEXT: lw a2, 8(a1) 797; RV32-FAST-NEXT: sw a2, 8(a0) 798; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 799; RV32-FAST-NEXT: vle32.v v8, (a1) 800; RV32-FAST-NEXT: vse32.v v8, (a0) 801; RV32-FAST-NEXT: ret 802; 803; RV64-FAST-LABEL: aligned_memcpy15: 804; RV64-FAST: # %bb.0: # %entry 805; RV64-FAST-NEXT: ld a2, 7(a1) 806; RV64-FAST-NEXT: sd a2, 7(a0) 807; RV64-FAST-NEXT: ld a1, 0(a1) 808; RV64-FAST-NEXT: sd a1, 0(a0) 809; RV64-FAST-NEXT: ret 810entry: 811 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false) 812 ret void 813} 814 815define void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind { 816; RV32-BOTH-LABEL: aligned_memcpy16: 817; RV32-BOTH: # %bb.0: # %entry 818; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma 819; RV32-BOTH-NEXT: vle64.v v8, (a1) 820; RV32-BOTH-NEXT: vse64.v v8, (a0) 821; RV32-BOTH-NEXT: ret 822; 823; RV64-BOTH-LABEL: aligned_memcpy16: 824; RV64-BOTH: # %bb.0: # %entry 825; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma 826; RV64-BOTH-NEXT: vle64.v v8, (a1) 827; RV64-BOTH-NEXT: vse64.v v8, (a0) 828; RV64-BOTH-NEXT: ret 829entry: 830 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false) 831 ret void 832} 833 834define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { 835; RV32-LABEL: aligned_memcpy31: 836; RV32: # %bb.0: # %entry 837; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 838; RV32-NEXT: vle64.v v8, (a1) 839; RV32-NEXT: vse64.v v8, (a0) 840; RV32-NEXT: addi a1, a1, 15 841; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma 842; RV32-NEXT: vle8.v v8, (a1) 843; RV32-NEXT: addi a0, a0, 15 844; RV32-NEXT: vse8.v v8, (a0) 845; RV32-NEXT: ret 846; 847; RV64-LABEL: aligned_memcpy31: 848; RV64: # %bb.0: # %entry 849; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 850; RV64-NEXT: vle64.v v8, (a1) 851; RV64-NEXT: vse64.v v8, (a0) 852; RV64-NEXT: addi a1, a1, 15 853; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma 854; RV64-NEXT: vle8.v v8, (a1) 855; RV64-NEXT: addi a0, a0, 15 856; RV64-NEXT: vse8.v v8, (a0) 857; RV64-NEXT: ret 858; 859; RV32-FAST-LABEL: aligned_memcpy31: 860; RV32-FAST: # %bb.0: # %entry 861; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 862; RV32-FAST-NEXT: vle64.v v8, (a1) 863; RV32-FAST-NEXT: vse64.v v8, (a0) 864; RV32-FAST-NEXT: addi a1, a1, 15 865; RV32-FAST-NEXT: vle64.v v8, (a1) 866; RV32-FAST-NEXT: addi a0, a0, 15 867; RV32-FAST-NEXT: vse64.v v8, (a0) 868; RV32-FAST-NEXT: ret 869; 870; RV64-FAST-LABEL: aligned_memcpy31: 871; RV64-FAST: # %bb.0: # %entry 872; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 873; RV64-FAST-NEXT: vle64.v v8, (a1) 874; RV64-FAST-NEXT: vse64.v v8, (a0) 875; RV64-FAST-NEXT: addi a1, a1, 15 876; RV64-FAST-NEXT: vle64.v v8, (a1) 877; RV64-FAST-NEXT: addi a0, a0, 15 878; RV64-FAST-NEXT: vse64.v v8, (a0) 879; RV64-FAST-NEXT: ret 880entry: 881 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false) 882 ret void 883} 884 885define void @aligned_memcpy32(ptr nocapture %dest, ptr %src) nounwind { 886; RV32-BOTH-LABEL: aligned_memcpy32: 887; RV32-BOTH: # %bb.0: # %entry 888; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma 889; RV32-BOTH-NEXT: vle64.v v8, (a1) 890; RV32-BOTH-NEXT: vse64.v v8, (a0) 891; RV32-BOTH-NEXT: ret 892; 893; RV64-BOTH-LABEL: aligned_memcpy32: 894; RV64-BOTH: # %bb.0: # %entry 895; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma 896; RV64-BOTH-NEXT: vle64.v v8, (a1) 897; RV64-BOTH-NEXT: vse64.v v8, (a0) 898; RV64-BOTH-NEXT: ret 899entry: 900 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 32, i1 false) 901 ret void 902} 903 904define void @aligned_memcpy64(ptr nocapture %dest, ptr %src) nounwind { 905; RV32-BOTH-LABEL: aligned_memcpy64: 906; RV32-BOTH: # %bb.0: # %entry 907; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 908; RV32-BOTH-NEXT: vle64.v v8, (a1) 909; RV32-BOTH-NEXT: vse64.v v8, (a0) 910; RV32-BOTH-NEXT: ret 911; 912; RV64-BOTH-LABEL: aligned_memcpy64: 913; RV64-BOTH: # %bb.0: # %entry 914; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 915; RV64-BOTH-NEXT: vle64.v v8, (a1) 916; RV64-BOTH-NEXT: vse64.v v8, (a0) 917; RV64-BOTH-NEXT: ret 918entry: 919 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 64, i1 false) 920 ret void 921} 922 923define void @aligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { 924; RV32-BOTH-LABEL: aligned_memcpy96: 925; RV32-BOTH: # %bb.0: # %entry 926; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 927; RV32-BOTH-NEXT: vle64.v v8, (a1) 928; RV32-BOTH-NEXT: vse64.v v8, (a0) 929; RV32-BOTH-NEXT: addi a1, a1, 64 930; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma 931; RV32-BOTH-NEXT: vle64.v v8, (a1) 932; RV32-BOTH-NEXT: addi a0, a0, 64 933; RV32-BOTH-NEXT: vse64.v v8, (a0) 934; RV32-BOTH-NEXT: ret 935; 936; RV64-BOTH-LABEL: aligned_memcpy96: 937; RV64-BOTH: # %bb.0: # %entry 938; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 939; RV64-BOTH-NEXT: vle64.v v8, (a1) 940; RV64-BOTH-NEXT: vse64.v v8, (a0) 941; RV64-BOTH-NEXT: addi a1, a1, 64 942; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma 943; RV64-BOTH-NEXT: vle64.v v8, (a1) 944; RV64-BOTH-NEXT: addi a0, a0, 64 945; RV64-BOTH-NEXT: vse64.v v8, (a0) 946; RV64-BOTH-NEXT: ret 947entry: 948 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 96, i1 false) 949 ret void 950} 951 952define void @aligned_memcpy128(ptr nocapture %dest, ptr %src) nounwind { 953; RV32-BOTH-LABEL: aligned_memcpy128: 954; RV32-BOTH: # %bb.0: # %entry 955; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 956; RV32-BOTH-NEXT: vle64.v v8, (a1) 957; RV32-BOTH-NEXT: vse64.v v8, (a0) 958; RV32-BOTH-NEXT: ret 959; 960; RV64-BOTH-LABEL: aligned_memcpy128: 961; RV64-BOTH: # %bb.0: # %entry 962; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 963; RV64-BOTH-NEXT: vle64.v v8, (a1) 964; RV64-BOTH-NEXT: vse64.v v8, (a0) 965; RV64-BOTH-NEXT: ret 966entry: 967 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 128, i1 false) 968 ret void 969} 970 971define void @aligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { 972; RV32-BOTH-LABEL: aligned_memcpy196: 973; RV32-BOTH: # %bb.0: # %entry 974; RV32-BOTH-NEXT: lw a2, 192(a1) 975; RV32-BOTH-NEXT: sw a2, 192(a0) 976; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 977; RV32-BOTH-NEXT: vle64.v v8, (a1) 978; RV32-BOTH-NEXT: vse64.v v8, (a0) 979; RV32-BOTH-NEXT: addi a1, a1, 128 980; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 981; RV32-BOTH-NEXT: vle64.v v8, (a1) 982; RV32-BOTH-NEXT: addi a0, a0, 128 983; RV32-BOTH-NEXT: vse64.v v8, (a0) 984; RV32-BOTH-NEXT: ret 985; 986; RV64-BOTH-LABEL: aligned_memcpy196: 987; RV64-BOTH: # %bb.0: # %entry 988; RV64-BOTH-NEXT: lw a2, 192(a1) 989; RV64-BOTH-NEXT: sw a2, 192(a0) 990; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 991; RV64-BOTH-NEXT: vle64.v v8, (a1) 992; RV64-BOTH-NEXT: vse64.v v8, (a0) 993; RV64-BOTH-NEXT: addi a1, a1, 128 994; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma 995; RV64-BOTH-NEXT: vle64.v v8, (a1) 996; RV64-BOTH-NEXT: addi a0, a0, 128 997; RV64-BOTH-NEXT: vse64.v v8, (a0) 998; RV64-BOTH-NEXT: ret 999entry: 1000 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 196, i1 false) 1001 ret void 1002} 1003 1004define void @aligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { 1005; RV32-BOTH-LABEL: aligned_memcpy256: 1006; RV32-BOTH: # %bb.0: # %entry 1007; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1008; RV32-BOTH-NEXT: vle64.v v8, (a1) 1009; RV32-BOTH-NEXT: vse64.v v8, (a0) 1010; RV32-BOTH-NEXT: addi a1, a1, 128 1011; RV32-BOTH-NEXT: vle64.v v8, (a1) 1012; RV32-BOTH-NEXT: addi a0, a0, 128 1013; RV32-BOTH-NEXT: vse64.v v8, (a0) 1014; RV32-BOTH-NEXT: ret 1015; 1016; RV64-BOTH-LABEL: aligned_memcpy256: 1017; RV64-BOTH: # %bb.0: # %entry 1018; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1019; RV64-BOTH-NEXT: vle64.v v8, (a1) 1020; RV64-BOTH-NEXT: vse64.v v8, (a0) 1021; RV64-BOTH-NEXT: addi a1, a1, 128 1022; RV64-BOTH-NEXT: vle64.v v8, (a1) 1023; RV64-BOTH-NEXT: addi a0, a0, 128 1024; RV64-BOTH-NEXT: vse64.v v8, (a0) 1025; RV64-BOTH-NEXT: ret 1026entry: 1027 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 256, i1 false) 1028 ret void 1029} 1030 1031; ------------------------------------------------------------------------ 1032; A few partially aligned cases 1033 1034 1035define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind { 1036; RV32-LABEL: memcpy16_align4: 1037; RV32: # %bb.0: # %entry 1038; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1039; RV32-NEXT: vle32.v v8, (a1) 1040; RV32-NEXT: vse32.v v8, (a0) 1041; RV32-NEXT: ret 1042; 1043; RV64-LABEL: memcpy16_align4: 1044; RV64: # %bb.0: # %entry 1045; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1046; RV64-NEXT: vle32.v v8, (a1) 1047; RV64-NEXT: vse32.v v8, (a0) 1048; RV64-NEXT: ret 1049; 1050; RV32-FAST-LABEL: memcpy16_align4: 1051; RV32-FAST: # %bb.0: # %entry 1052; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1053; RV32-FAST-NEXT: vle64.v v8, (a1) 1054; RV32-FAST-NEXT: vse64.v v8, (a0) 1055; RV32-FAST-NEXT: ret 1056; 1057; RV64-FAST-LABEL: memcpy16_align4: 1058; RV64-FAST: # %bb.0: # %entry 1059; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1060; RV64-FAST-NEXT: vle64.v v8, (a1) 1061; RV64-FAST-NEXT: vse64.v v8, (a0) 1062; RV64-FAST-NEXT: ret 1063entry: 1064 tail call void @llvm.memcpy.inline.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false) 1065 ret void 1066} 1067 1068define i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) { 1069; RV32-LABEL: memcpy11_align8: 1070; RV32: # %bb.0: # %entry 1071; RV32-NEXT: lbu a2, 10(a1) 1072; RV32-NEXT: sb a2, 10(a0) 1073; RV32-NEXT: lh a2, 8(a1) 1074; RV32-NEXT: sh a2, 8(a0) 1075; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1076; RV32-NEXT: vle32.v v8, (a1) 1077; RV32-NEXT: vse32.v v8, (a0) 1078; RV32-NEXT: li a0, 0 1079; RV32-NEXT: ret 1080; 1081; RV64-LABEL: memcpy11_align8: 1082; RV64: # %bb.0: # %entry 1083; RV64-NEXT: lbu a2, 10(a1) 1084; RV64-NEXT: sb a2, 10(a0) 1085; RV64-NEXT: lh a2, 8(a1) 1086; RV64-NEXT: sh a2, 8(a0) 1087; RV64-NEXT: ld a1, 0(a1) 1088; RV64-NEXT: sd a1, 0(a0) 1089; RV64-NEXT: li a0, 0 1090; RV64-NEXT: ret 1091; 1092; RV32-FAST-LABEL: memcpy11_align8: 1093; RV32-FAST: # %bb.0: # %entry 1094; RV32-FAST-NEXT: lw a2, 7(a1) 1095; RV32-FAST-NEXT: sw a2, 7(a0) 1096; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1097; RV32-FAST-NEXT: vle32.v v8, (a1) 1098; RV32-FAST-NEXT: vse32.v v8, (a0) 1099; RV32-FAST-NEXT: li a0, 0 1100; RV32-FAST-NEXT: ret 1101; 1102; RV64-FAST-LABEL: memcpy11_align8: 1103; RV64-FAST: # %bb.0: # %entry 1104; RV64-FAST-NEXT: lw a2, 7(a1) 1105; RV64-FAST-NEXT: sw a2, 7(a0) 1106; RV64-FAST-NEXT: ld a1, 0(a1) 1107; RV64-FAST-NEXT: sd a1, 0(a0) 1108; RV64-FAST-NEXT: li a0, 0 1109; RV64-FAST-NEXT: ret 1110entry: 1111 call void @llvm.memcpy.inline.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false) 1112 ret i32 0 1113} 1114 1115 1116declare void @llvm.memcpy.inline.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind 1117declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind 1118