1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s 4 5define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { 6; CHECK-LABEL: vadd_v512i8_zvl128: 7; CHECK: # %bb.0: 8; CHECK-NEXT: addi sp, sp, -16 9; CHECK-NEXT: .cfi_def_cfa_offset 16 10; CHECK-NEXT: csrr a2, vlenb 11; CHECK-NEXT: li a4, 48 12; CHECK-NEXT: mul a2, a2, a4 13; CHECK-NEXT: sub sp, sp, a2 14; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb 15; CHECK-NEXT: csrr a2, vlenb 16; CHECK-NEXT: slli a2, a2, 5 17; CHECK-NEXT: add a2, sp, a2 18; CHECK-NEXT: addi a2, a2, 16 19; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 20; CHECK-NEXT: csrr a2, vlenb 21; CHECK-NEXT: li a4, 40 22; CHECK-NEXT: mul a2, a2, a4 23; CHECK-NEXT: add a2, sp, a2 24; CHECK-NEXT: addi a2, a2, 16 25; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 26; CHECK-NEXT: li a2, 128 27; CHECK-NEXT: addi a4, a3, 128 28; CHECK-NEXT: addi a5, a3, 384 29; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 30; CHECK-NEXT: vle8.v v8, (a5) 31; CHECK-NEXT: csrr a2, vlenb 32; CHECK-NEXT: li a5, 24 33; CHECK-NEXT: mul a2, a2, a5 34; CHECK-NEXT: add a2, sp, a2 35; CHECK-NEXT: addi a2, a2, 16 36; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 37; CHECK-NEXT: addi a2, a1, 128 38; CHECK-NEXT: vle8.v v8, (a1) 39; CHECK-NEXT: csrr a1, vlenb 40; CHECK-NEXT: slli a1, a1, 4 41; CHECK-NEXT: add a1, sp, a1 42; CHECK-NEXT: addi a1, a1, 16 43; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 44; CHECK-NEXT: addi a1, a3, 256 45; CHECK-NEXT: vle8.v v8, (a1) 46; CHECK-NEXT: csrr a1, vlenb 47; CHECK-NEXT: slli a1, a1, 3 48; CHECK-NEXT: add a1, sp, a1 49; CHECK-NEXT: addi a1, a1, 16 50; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 51; CHECK-NEXT: vle8.v v8, (a2) 52; CHECK-NEXT: addi a1, sp, 16 53; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 54; CHECK-NEXT: vle8.v v24, (a4) 55; CHECK-NEXT: vle8.v v0, (a3) 56; CHECK-NEXT: csrr a1, vlenb 57; CHECK-NEXT: slli a1, a1, 4 58; CHECK-NEXT: add a1, sp, a1 59; CHECK-NEXT: addi a1, a1, 16 60; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 61; CHECK-NEXT: csrr a1, vlenb 62; CHECK-NEXT: slli a1, a1, 3 63; CHECK-NEXT: add a1, sp, a1 64; CHECK-NEXT: addi a1, a1, 16 65; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 66; CHECK-NEXT: vadd.vv v8, v8, v16 67; CHECK-NEXT: csrr a1, vlenb 68; CHECK-NEXT: slli a1, a1, 4 69; CHECK-NEXT: add a1, sp, a1 70; CHECK-NEXT: addi a1, a1, 16 71; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 72; CHECK-NEXT: csrr a1, vlenb 73; CHECK-NEXT: li a2, 24 74; CHECK-NEXT: mul a1, a1, a2 75; CHECK-NEXT: add a1, sp, a1 76; CHECK-NEXT: addi a1, a1, 16 77; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 78; CHECK-NEXT: addi a1, sp, 16 79; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 80; CHECK-NEXT: vadd.vv v16, v16, v8 81; CHECK-NEXT: csrr a1, vlenb 82; CHECK-NEXT: slli a1, a1, 5 83; CHECK-NEXT: add a1, sp, a1 84; CHECK-NEXT: addi a1, a1, 16 85; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 86; CHECK-NEXT: vadd.vv v24, v8, v24 87; CHECK-NEXT: csrr a1, vlenb 88; CHECK-NEXT: li a2, 40 89; CHECK-NEXT: mul a1, a1, a2 90; CHECK-NEXT: add a1, sp, a1 91; CHECK-NEXT: addi a1, a1, 16 92; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 93; CHECK-NEXT: vadd.vv v0, v8, v0 94; CHECK-NEXT: vse8.v v0, (a0) 95; CHECK-NEXT: addi a1, a0, 384 96; CHECK-NEXT: vse8.v v16, (a1) 97; CHECK-NEXT: addi a1, a0, 256 98; CHECK-NEXT: csrr a2, vlenb 99; CHECK-NEXT: slli a2, a2, 4 100; CHECK-NEXT: add a2, sp, a2 101; CHECK-NEXT: addi a2, a2, 16 102; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 103; CHECK-NEXT: vse8.v v8, (a1) 104; CHECK-NEXT: addi a0, a0, 128 105; CHECK-NEXT: vse8.v v24, (a0) 106; CHECK-NEXT: csrr a0, vlenb 107; CHECK-NEXT: li a1, 48 108; CHECK-NEXT: mul a0, a0, a1 109; CHECK-NEXT: add sp, sp, a0 110; CHECK-NEXT: .cfi_def_cfa sp, 16 111; CHECK-NEXT: addi sp, sp, 16 112; CHECK-NEXT: .cfi_def_cfa_offset 0 113; CHECK-NEXT: ret 114 %c = add <512 x i8> %a, %b 115 ret <512 x i8> %c 116} 117 118define <512 x i8> @vadd_v512i8_zvl256(<512 x i8> %a, <512 x i8> %b) #1 { 119; CHECK-LABEL: vadd_v512i8_zvl256: 120; CHECK: # %bb.0: 121; CHECK-NEXT: addi a1, a0, 256 122; CHECK-NEXT: li a2, 256 123; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma 124; CHECK-NEXT: vle8.v v24, (a0) 125; CHECK-NEXT: vle8.v v0, (a1) 126; CHECK-NEXT: vadd.vv v8, v8, v24 127; CHECK-NEXT: vadd.vv v16, v16, v0 128; CHECK-NEXT: ret 129 %c = add <512 x i8> %a, %b 130 ret <512 x i8> %c 131} 132 133define <512 x i8> @vadd_v512i8_zvl512(<512 x i8> %a, <512 x i8> %b) #2 { 134; CHECK-LABEL: vadd_v512i8_zvl512: 135; CHECK: # %bb.0: 136; CHECK-NEXT: li a0, 512 137; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 138; CHECK-NEXT: vadd.vv v8, v8, v16 139; CHECK-NEXT: ret 140 %c = add <512 x i8> %a, %b 141 ret <512 x i8> %c 142} 143 144define <512 x i8> @vadd_v512i8_zvl1024(<512 x i8> %a, <512 x i8> %b) #3 { 145; CHECK-LABEL: vadd_v512i8_zvl1024: 146; CHECK: # %bb.0: 147; CHECK-NEXT: li a0, 512 148; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 149; CHECK-NEXT: vadd.vv v8, v8, v12 150; CHECK-NEXT: ret 151 %c = add <512 x i8> %a, %b 152 ret <512 x i8> %c 153} 154 155define <512 x i8> @vadd_v512i8_zvl2048(<512 x i8> %a, <512 x i8> %b) #4 { 156; CHECK-LABEL: vadd_v512i8_zvl2048: 157; CHECK: # %bb.0: 158; CHECK-NEXT: li a0, 512 159; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 160; CHECK-NEXT: vadd.vv v8, v8, v10 161; CHECK-NEXT: ret 162 %c = add <512 x i8> %a, %b 163 ret <512 x i8> %c 164} 165 166define <512 x i8> @vadd_v512i8_zvl4096(<512 x i8> %a, <512 x i8> %b) #5 { 167; CHECK-LABEL: vadd_v512i8_zvl4096: 168; CHECK: # %bb.0: 169; CHECK-NEXT: li a0, 512 170; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 171; CHECK-NEXT: vadd.vv v8, v8, v9 172; CHECK-NEXT: ret 173 %c = add <512 x i8> %a, %b 174 ret <512 x i8> %c 175} 176 177attributes #0 = { vscale_range(2,1024) } 178attributes #1 = { vscale_range(4,1024) } 179attributes #2 = { vscale_range(8,1024) } 180attributes #3 = { vscale_range(16,1024) } 181attributes #4 = { vscale_range(32,1024) } 182attributes #5 = { vscale_range(64,1024) } 183