@HardWareMan movep works 100% perfectly, you just have to point directly to the first valid byte.
Proof:
Code: Select all
// (field size is size of working ram, not sram)
STATIC void* memcpyFromPeripheral( void* const destination, void const* const source, u32 size )
{
if( size > 0 )
{
register u8* dest = destination;
register u8 const* src = source;
// Align to 2bytes
if( ((u32)dest) & 1 )
{
*dest++ = *src;
src += 2;
size--;
}
register u32 data;
register s32 loops = size >> 5;
register s32 jump = -((size << 1) & (7 << 3));
__asm volatile(
" jmp (2f, %%pc, %4)\n"
"1: movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
" movep.l 0(%1), %0; move.l %0, (%2)+; addq.l #8, %1\n"
"2: dbra %3, 1b"
: "=d"(data), "=>a"(src), "=>a"(dest), "=d"(loops)
: "d"(jump), "1"(src), "2"(dest), "3"(loops)
);
if( size & 2 )
{
__asm volatile(
"movep.w 0(%1), %0; move.w %0, (%2)+; addq.l #4, %1\n"
: "=d"(data), "=>a"(src), "=>a"(dest)
: "1"(src), "2"(dest)
);
}
if( size & 1 )
{
*dest++ = *src;
}
}
return destination;
}
The main loop takes 66bytes of code to copy 32bytes of data, while a basic c function takes 8bytes of code to move 1byte of data.
HELP. Spanish TVs are brain washing people to be hostile to me.