i usually forget the way to load vectors with adv. simd intrinsics, since i don't use them often. i also usually forget sshl with negated shift values is how you can do an arithmetic right shift on a by-vector-element basis, which, in hindsight makes sense i guess, since sshr only has an immediate variant
now i have a place to look if i forget again
#include <arm_neon.h>
#include <cstdio>
#include <cstring>
int main() {
const int vals[4] = {int(0x80000000), 0x00000001, int(0xFF000000), 0};
const int shifts[4] = {-1, -2, -8, -10};
const int32x4_t values = vld1q_s32(vals);
const int32x4_t shift = vld1q_s32(shifts);
const int32x4_t result = vshlq_s32(values, shift);
int out[4];
std::memcpy(&out, &result, sizeof(out));
// Result: 0x00000000 | 0xFFFF0000 | 0x00000000 | 0xC0000000
std::printf("0x%08X | 0x%08X | 0x%08X | 0x%08X",
out[3], out[2], out[1], out[0]);
return 0;
}