1; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s 2 3; Vectorize and emit valid code (Issue #54896). 4 5define void @int8x3a2(ptr nocapture align 2 %ptr) { 6 %ptr0 = getelementptr i8, ptr %ptr, i64 0 7 %ptr1 = getelementptr i8, ptr %ptr, i64 1 8 %ptr2 = getelementptr i8, ptr %ptr, i64 2 9 10 %l0 = load i8, ptr %ptr0, align 2 11 %l1 = load i8, ptr %ptr1, align 1 12 %l2 = load i8, ptr %ptr2, align 2 13 14 store i8 %l2, ptr %ptr0, align 2 15 store i8 %l1, ptr %ptr1, align 1 16 store i8 %l0, ptr %ptr2, align 2 17 18 ret void 19 20; CHECK-LABEL: @int8x3a2 21; CHECK-DAG: load <2 x i8> 22; CHECK-DAG: load i8 23; CHECK-DAG: store <2 x i8> 24; CHECK-DAG: store i8 25} 26 27define void @int8x3a4(ptr nocapture align 4 %ptr) { 28 %ptr0 = getelementptr i8, ptr %ptr, i64 0 29 %ptr1 = getelementptr i8, ptr %ptr, i64 1 30 %ptr2 = getelementptr i8, ptr %ptr, i64 2 31 32 %l0 = load i8, ptr %ptr0, align 4 33 %l1 = load i8, ptr %ptr1, align 1 34 %l2 = load i8, ptr %ptr2, align 2 35 36 store i8 %l2, ptr %ptr0, align 2 37 store i8 %l1, ptr %ptr1, align 1 38 store i8 %l0, ptr %ptr2, align 4 39 40 ret void 41 42; CHECK-LABEL: @int8x3a4 43; CHECK: load <2 x i8> 44; CHECK: load i8 45; CHECK: store <2 x i8> 46; CHECK: store i8 47} 48 49define void @int8x12a4(ptr nocapture align 4 %ptr) { 50 %ptr0 = getelementptr i8, ptr %ptr, i64 0 51 %ptr1 = getelementptr i8, ptr %ptr, i64 1 52 %ptr2 = getelementptr i8, ptr %ptr, i64 2 53 %ptr3 = getelementptr i8, ptr %ptr, i64 3 54 %ptr4 = getelementptr i8, ptr %ptr, i64 4 55 %ptr5 = getelementptr i8, ptr %ptr, i64 5 56 %ptr6 = getelementptr i8, ptr %ptr, i64 6 57 %ptr7 = getelementptr i8, ptr %ptr, i64 7 58 %ptr8 = getelementptr i8, ptr %ptr, i64 8 59 %ptr9 = getelementptr i8, ptr %ptr, i64 9 60 %ptra = getelementptr i8, ptr %ptr, i64 10 61 %ptrb = getelementptr i8, ptr %ptr, i64 11 62 63 %l0 = load i8, ptr %ptr0, align 4 64 %l1 = load i8, ptr %ptr1, align 1 65 %l2 = load i8, ptr %ptr2, align 2 66 %l3 = load i8, ptr %ptr3, align 1 67 %l4 = load i8, ptr %ptr4, align 4 68 %l5 = load i8, ptr %ptr5, align 1 69 %l6 = load i8, ptr %ptr6, align 2 70 %l7 = load i8, ptr %ptr7, align 1 71 %l8 = load i8, ptr %ptr8, align 4 72 %l9 = load i8, ptr %ptr9, align 1 73 %la = load i8, ptr %ptra, align 2 74 %lb = load i8, ptr %ptrb, align 1 75 76 store i8 %lb, ptr %ptr0, align 4 77 store i8 %la, ptr %ptr1, align 1 78 store i8 %l9, ptr %ptr2, align 2 79 store i8 %l8, ptr %ptr3, align 1 80 store i8 %l7, ptr %ptr4, align 4 81 store i8 %l6, ptr %ptr5, align 1 82 store i8 %l5, ptr %ptr6, align 2 83 store i8 %l4, ptr %ptr7, align 1 84 store i8 %l3, ptr %ptr8, align 4 85 store i8 %l2, ptr %ptr9, align 1 86 store i8 %l1, ptr %ptra, align 2 87 store i8 %l0, ptr %ptrb, align 1 88 89 ret void 90 91; CHECK-LABEL: @int8x12a4 92; CHECK: load <4 x i8> 93; CHECK: load <4 x i8> 94; CHECK: load <4 x i8> 95; CHECK: store <4 x i8> 96; CHECK: store <4 x i8> 97; CHECK: store <4 x i8> 98} 99 100 101define void @int8x16a4(ptr nocapture align 4 %ptr) { 102 %ptr0 = getelementptr i8, ptr %ptr, i64 0 103 %ptr1 = getelementptr i8, ptr %ptr, i64 1 104 %ptr2 = getelementptr i8, ptr %ptr, i64 2 105 %ptr3 = getelementptr i8, ptr %ptr, i64 3 106 %ptr4 = getelementptr i8, ptr %ptr, i64 4 107 %ptr5 = getelementptr i8, ptr %ptr, i64 5 108 %ptr6 = getelementptr i8, ptr %ptr, i64 6 109 %ptr7 = getelementptr i8, ptr %ptr, i64 7 110 %ptr8 = getelementptr i8, ptr %ptr, i64 8 111 %ptr9 = getelementptr i8, ptr %ptr, i64 9 112 %ptra = getelementptr i8, ptr %ptr, i64 10 113 %ptrb = getelementptr i8, ptr %ptr, i64 11 114 %ptrc = getelementptr i8, ptr %ptr, i64 12 115 %ptrd = getelementptr i8, ptr %ptr, i64 13 116 %ptre = getelementptr i8, ptr %ptr, i64 14 117 %ptrf = getelementptr i8, ptr %ptr, i64 15 118 119 %l0 = load i8, ptr %ptr0, align 4 120 %l1 = load i8, ptr %ptr1, align 1 121 %l2 = load i8, ptr %ptr2, align 2 122 %l3 = load i8, ptr %ptr3, align 1 123 %l4 = load i8, ptr %ptr4, align 4 124 %l5 = load i8, ptr %ptr5, align 1 125 %l6 = load i8, ptr %ptr6, align 2 126 %l7 = load i8, ptr %ptr7, align 1 127 %l8 = load i8, ptr %ptr8, align 4 128 %l9 = load i8, ptr %ptr9, align 1 129 %la = load i8, ptr %ptra, align 2 130 %lb = load i8, ptr %ptrb, align 1 131 %lc = load i8, ptr %ptrc, align 4 132 %ld = load i8, ptr %ptrd, align 1 133 %le = load i8, ptr %ptre, align 2 134 %lf = load i8, ptr %ptrf, align 1 135 136 store i8 %lf, ptr %ptrc, align 4 137 store i8 %le, ptr %ptrd, align 1 138 store i8 %ld, ptr %ptre, align 2 139 store i8 %lc, ptr %ptrf, align 1 140 store i8 %lb, ptr %ptr0, align 4 141 store i8 %la, ptr %ptr1, align 1 142 store i8 %l9, ptr %ptr2, align 2 143 store i8 %l8, ptr %ptr3, align 1 144 store i8 %l7, ptr %ptr4, align 4 145 store i8 %l6, ptr %ptr5, align 1 146 store i8 %l5, ptr %ptr6, align 2 147 store i8 %l4, ptr %ptr7, align 1 148 store i8 %l3, ptr %ptr8, align 4 149 store i8 %l2, ptr %ptr9, align 1 150 store i8 %l1, ptr %ptra, align 2 151 store i8 %l0, ptr %ptrb, align 1 152 153 ret void 154 155; CHECK-LABEL: @int8x16a4 156; CHECK: load <4 x i8> 157; CHECK: load <4 x i8> 158; CHECK: load <4 x i8> 159; CHECK: load <4 x i8> 160; CHECK: store <4 x i8> 161; CHECK: store <4 x i8> 162; CHECK: store <4 x i8> 163; CHECK: store <4 x i8> 164} 165 166define void @int8x8a8(ptr nocapture align 8 %ptr) { 167 %ptr0 = getelementptr i8, ptr %ptr, i64 0 168 %ptr1 = getelementptr i8, ptr %ptr, i64 1 169 %ptr2 = getelementptr i8, ptr %ptr, i64 2 170 %ptr3 = getelementptr i8, ptr %ptr, i64 3 171 %ptr4 = getelementptr i8, ptr %ptr, i64 4 172 %ptr5 = getelementptr i8, ptr %ptr, i64 5 173 %ptr6 = getelementptr i8, ptr %ptr, i64 6 174 %ptr7 = getelementptr i8, ptr %ptr, i64 7 175 176 %l0 = load i8, ptr %ptr0, align 8 177 %l1 = load i8, ptr %ptr1, align 1 178 %l2 = load i8, ptr %ptr2, align 2 179 %l3 = load i8, ptr %ptr3, align 1 180 %l4 = load i8, ptr %ptr4, align 4 181 %l5 = load i8, ptr %ptr5, align 1 182 %l6 = load i8, ptr %ptr6, align 2 183 %l7 = load i8, ptr %ptr7, align 1 184 185 store i8 %l7, ptr %ptr0, align 8 186 store i8 %l6, ptr %ptr1, align 1 187 store i8 %l5, ptr %ptr2, align 2 188 store i8 %l4, ptr %ptr3, align 1 189 store i8 %l3, ptr %ptr4, align 4 190 store i8 %l2, ptr %ptr5, align 1 191 store i8 %l1, ptr %ptr6, align 2 192 store i8 %l0, ptr %ptr7, align 1 193 194 ret void 195 196; CHECK-LABEL: @int8x8a8 197; CHECK: load <8 x i8> 198; CHECK: store <8 x i8> 199} 200 201define void @int8x12a8(ptr nocapture align 8 %ptr) { 202 %ptr0 = getelementptr i8, ptr %ptr, i64 0 203 %ptr1 = getelementptr i8, ptr %ptr, i64 1 204 %ptr2 = getelementptr i8, ptr %ptr, i64 2 205 %ptr3 = getelementptr i8, ptr %ptr, i64 3 206 %ptr4 = getelementptr i8, ptr %ptr, i64 4 207 %ptr5 = getelementptr i8, ptr %ptr, i64 5 208 %ptr6 = getelementptr i8, ptr %ptr, i64 6 209 %ptr7 = getelementptr i8, ptr %ptr, i64 7 210 %ptr8 = getelementptr i8, ptr %ptr, i64 8 211 %ptr9 = getelementptr i8, ptr %ptr, i64 9 212 %ptra = getelementptr i8, ptr %ptr, i64 10 213 %ptrb = getelementptr i8, ptr %ptr, i64 11 214 215 %l0 = load i8, ptr %ptr0, align 8 216 %l1 = load i8, ptr %ptr1, align 1 217 %l2 = load i8, ptr %ptr2, align 2 218 %l3 = load i8, ptr %ptr3, align 1 219 %l4 = load i8, ptr %ptr4, align 4 220 %l5 = load i8, ptr %ptr5, align 1 221 %l6 = load i8, ptr %ptr6, align 2 222 %l7 = load i8, ptr %ptr7, align 1 223 %l8 = load i8, ptr %ptr8, align 8 224 %l9 = load i8, ptr %ptr9, align 1 225 %la = load i8, ptr %ptra, align 2 226 %lb = load i8, ptr %ptrb, align 1 227 228 store i8 %lb, ptr %ptr0, align 8 229 store i8 %la, ptr %ptr1, align 1 230 store i8 %l9, ptr %ptr2, align 2 231 store i8 %l8, ptr %ptr3, align 1 232 store i8 %l7, ptr %ptr4, align 4 233 store i8 %l6, ptr %ptr5, align 1 234 store i8 %l5, ptr %ptr6, align 2 235 store i8 %l4, ptr %ptr7, align 1 236 store i8 %l3, ptr %ptr8, align 8 237 store i8 %l2, ptr %ptr9, align 1 238 store i8 %l1, ptr %ptra, align 2 239 store i8 %l0, ptr %ptrb, align 1 240 241 ret void 242 243; CHECK-LABEL: @int8x12a8 244; CHECK-DAG: load <8 x i8> 245; CHECK-DAG: load <4 x i8> 246; CHECK-DAG: store <8 x i8> 247; CHECK-DAG: store <4 x i8> 248} 249 250 251define void @int8x16a8(ptr nocapture align 8 %ptr) { 252 %ptr0 = getelementptr i8, ptr %ptr, i64 0 253 %ptr1 = getelementptr i8, ptr %ptr, i64 1 254 %ptr2 = getelementptr i8, ptr %ptr, i64 2 255 %ptr3 = getelementptr i8, ptr %ptr, i64 3 256 %ptr4 = getelementptr i8, ptr %ptr, i64 4 257 %ptr5 = getelementptr i8, ptr %ptr, i64 5 258 %ptr6 = getelementptr i8, ptr %ptr, i64 6 259 %ptr7 = getelementptr i8, ptr %ptr, i64 7 260 %ptr8 = getelementptr i8, ptr %ptr, i64 8 261 %ptr9 = getelementptr i8, ptr %ptr, i64 9 262 %ptra = getelementptr i8, ptr %ptr, i64 10 263 %ptrb = getelementptr i8, ptr %ptr, i64 11 264 %ptrc = getelementptr i8, ptr %ptr, i64 12 265 %ptrd = getelementptr i8, ptr %ptr, i64 13 266 %ptre = getelementptr i8, ptr %ptr, i64 14 267 %ptrf = getelementptr i8, ptr %ptr, i64 15 268 269 %l0 = load i8, ptr %ptr0, align 8 270 %l1 = load i8, ptr %ptr1, align 1 271 %l2 = load i8, ptr %ptr2, align 2 272 %l3 = load i8, ptr %ptr3, align 1 273 %l4 = load i8, ptr %ptr4, align 4 274 %l5 = load i8, ptr %ptr5, align 1 275 %l6 = load i8, ptr %ptr6, align 2 276 %l7 = load i8, ptr %ptr7, align 1 277 %l8 = load i8, ptr %ptr8, align 8 278 %l9 = load i8, ptr %ptr9, align 1 279 %la = load i8, ptr %ptra, align 2 280 %lb = load i8, ptr %ptrb, align 1 281 %lc = load i8, ptr %ptrc, align 4 282 %ld = load i8, ptr %ptrd, align 1 283 %le = load i8, ptr %ptre, align 2 284 %lf = load i8, ptr %ptrf, align 1 285 286 store i8 %lf, ptr %ptr0, align 8 287 store i8 %le, ptr %ptr1, align 1 288 store i8 %ld, ptr %ptr2, align 2 289 store i8 %lc, ptr %ptr3, align 1 290 store i8 %lb, ptr %ptr4, align 4 291 store i8 %la, ptr %ptr5, align 1 292 store i8 %l9, ptr %ptr6, align 2 293 store i8 %l8, ptr %ptr7, align 1 294 store i8 %l7, ptr %ptr8, align 8 295 store i8 %l6, ptr %ptr9, align 1 296 store i8 %l5, ptr %ptra, align 2 297 store i8 %l4, ptr %ptrb, align 1 298 store i8 %l3, ptr %ptrc, align 4 299 store i8 %l2, ptr %ptrd, align 1 300 store i8 %l1, ptr %ptre, align 2 301 store i8 %l0, ptr %ptrf, align 1 302 303 ret void 304 305; CHECK-LABEL: @int8x16a8 306; CHECK: load <8 x i8> 307; CHECK: load <8 x i8> 308; CHECK: store <8 x i8> 309; CHECK: store <8 x i8> 310} 311