blob: d7b05bcaaf9499a11fcc27dea551a7a8740d477a [file] [log] [blame]
/*
* Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sub license,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/* Thomas' orginal gutted for mesa by Keith Whitwell
*/
#include "via_tex.h"
#if defined( USE_SSE_ASM )
#define SSE_PREFETCH " prefetchnta "
#define FENCE __asm__ __volatile__ ("sfence":::"memory");
#define PREFETCH1(arch_prefetch,from) \
__asm__ __volatile__ ( \
"1: " arch_prefetch "(%0)\n" \
arch_prefetch "32(%0)\n" \
arch_prefetch "64(%0)\n" \
arch_prefetch "96(%0)\n" \
arch_prefetch "128(%0)\n" \
arch_prefetch "160(%0)\n" \
arch_prefetch "192(%0)\n" \
arch_prefetch "256(%0)\n" \
arch_prefetch "288(%0)\n" \
"2:\n" \
: : "r" (from) );
#define small_memcpy(to,from,n) \
{ \
__asm__ __volatile__( \
"movl %2,%%ecx\n\t" \
"sarl $2,%%ecx\n\t" \
"rep ; movsl\n\t" \
"testb $2,%b2\n\t" \
"je 1f\n\t" \
"movsw\n" \
"1:\ttestb $1,%b2\n\t" \
"je 2f\n\t" \
"movsb\n" \
"2:" \
:"=&D" (to), "=&S" (from) \
:"q" (n),"0" ((long) to),"1" ((long) from) \
: "%ecx","memory"); \
}
#define SSE_CPY(prefetch,from,to,dummy,lcnt) \
if ((unsigned long) from & 15) { \
__asm__ __volatile__ ( \
"1:\n" \
prefetch "320(%1)\n" \
" movups (%1), %%xmm0\n" \
" movups 16(%1), %%xmm1\n" \
" movntps %%xmm0, (%0)\n" \
" movntps %%xmm1, 16(%0)\n" \
prefetch "352(%1)\n" \
" movups 32(%1), %%xmm2\n" \
" movups 48(%1), %%xmm3\n" \
" movntps %%xmm2, 32(%0)\n" \
" movntps %%xmm3, 48(%0)\n" \
" addl $64,%0\n" \
" addl $64,%1\n" \
" decl %2\n" \
" jne 1b\n" \
:"=&D"(to), "=&S"(from), "=&r"(dummy) \
:"0" (to), "1" (from), "2" (lcnt): "memory"); \
} else { \
__asm__ __volatile__ ( \
"2:\n" \
prefetch "320(%1)\n" \
" movaps (%1), %%xmm0\n" \
" movaps 16(%1), %%xmm1\n" \
" movntps %%xmm0, (%0)\n" \
" movntps %%xmm1, 16(%0)\n" \
prefetch "352(%1)\n" \
" movaps 32(%1), %%xmm2\n" \
" movaps 48(%1), %%xmm3\n" \
" movntps %%xmm2, 32(%0)\n" \
" movntps %%xmm3, 48(%0)\n" \
" addl $64,%0\n" \
" addl $64,%1\n" \
" decl %2\n" \
" jne 2b\n" \
:"=&D"(to), "=&S"(from), "=&r"(dummy) \
:"0" (to), "1" (from), "2" (lcnt): "memory"); \
}
/*
*/
void via_sse_memcpy(void *to,
const void *from,
size_t sz)
{
int dummy;
int lcnt = sz >> 6;
int rest = sz & 63;
PREFETCH1(SSE_PREFETCH,from);
if (lcnt > 5) {
lcnt -= 5;
SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt);
lcnt = 5;
}
if (lcnt) {
SSE_CPY("#",from,to,dummy,lcnt);
}
if (rest) small_memcpy(to, from, rest);
FENCE;
}
#endif /* defined( USE_SSE_ASM ) */