|
fast memory copy code(2) mov ecx, 2048 shr ecx, 6 loopMemToL1: prefetchnta 64[ESI] // Prefetch next loop, non-temporal prefetchnta 96[ESI] movq mm1, 0[ESI] // Read in source data movq mm2, 8[ESI] movq mm3, 16[ESI] movq mm4, 24[ESI] movq mm5, 32[ESI] movq mm6, 40[ESI] movq mm7, 48[ESI] movq mm0, 56[ESI] movq 0[EDI], mm1 // Store into L1 movq 8[EDI], mm2 movq 16[EDI], mm3 movq 24[EDI], mm4 movq 32[EDI], mm5 movq 40[EDI], mm6 movq 48[EDI], mm7 movq 56[EDI], mm0 add esi, 64 add edi, 64 dec ecx jnz loopMemToL1 pop edi // Now copy from L1 to system memory push esi mov esi, tbuf mov ecx, 2048 shr ecx, 6 loopL1ToMem: movq mm1, 0[ESI] // Read in source data from L1 movq mm2, 8[ESI] movq mm3, 16[ESI] movq mm4, 24[ESI]
|