| /* |
| * Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sub license, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /* Thomas' orginal gutted for mesa by Keith Whitwell |
| */ |
| |
| #include "via_tex.h" |
| |
| #if defined( USE_SSE_ASM ) |
| |
| #define SSE_PREFETCH " prefetchnta " |
| #define FENCE __asm__ __volatile__ ("sfence":::"memory"); |
| |
| |
| #define PREFETCH1(arch_prefetch,from) \ |
| __asm__ __volatile__ ( \ |
| "1: " arch_prefetch "(%0)\n" \ |
| arch_prefetch "32(%0)\n" \ |
| arch_prefetch "64(%0)\n" \ |
| arch_prefetch "96(%0)\n" \ |
| arch_prefetch "128(%0)\n" \ |
| arch_prefetch "160(%0)\n" \ |
| arch_prefetch "192(%0)\n" \ |
| arch_prefetch "256(%0)\n" \ |
| arch_prefetch "288(%0)\n" \ |
| "2:\n" \ |
| : : "r" (from) ); |
| |
| |
| |
| #define small_memcpy(to,from,n) \ |
| { \ |
| __asm__ __volatile__( \ |
| "movl %2,%%ecx\n\t" \ |
| "sarl $2,%%ecx\n\t" \ |
| "rep ; movsl\n\t" \ |
| "testb $2,%b2\n\t" \ |
| "je 1f\n\t" \ |
| "movsw\n" \ |
| "1:\ttestb $1,%b2\n\t" \ |
| "je 2f\n\t" \ |
| "movsb\n" \ |
| "2:" \ |
| :"=&D" (to), "=&S" (from) \ |
| :"q" (n),"0" ((long) to),"1" ((long) from) \ |
| : "%ecx","memory"); \ |
| } |
| |
| |
| #define SSE_CPY(prefetch,from,to,dummy,lcnt) \ |
| if ((unsigned long) from & 15) { \ |
| __asm__ __volatile__ ( \ |
| "1:\n" \ |
| prefetch "320(%1)\n" \ |
| " movups (%1), %%xmm0\n" \ |
| " movups 16(%1), %%xmm1\n" \ |
| " movntps %%xmm0, (%0)\n" \ |
| " movntps %%xmm1, 16(%0)\n" \ |
| prefetch "352(%1)\n" \ |
| " movups 32(%1), %%xmm2\n" \ |
| " movups 48(%1), %%xmm3\n" \ |
| " movntps %%xmm2, 32(%0)\n" \ |
| " movntps %%xmm3, 48(%0)\n" \ |
| " addl $64,%0\n" \ |
| " addl $64,%1\n" \ |
| " decl %2\n" \ |
| " jne 1b\n" \ |
| :"=&D"(to), "=&S"(from), "=&r"(dummy) \ |
| :"0" (to), "1" (from), "2" (lcnt): "memory"); \ |
| } else { \ |
| __asm__ __volatile__ ( \ |
| "2:\n" \ |
| prefetch "320(%1)\n" \ |
| " movaps (%1), %%xmm0\n" \ |
| " movaps 16(%1), %%xmm1\n" \ |
| " movntps %%xmm0, (%0)\n" \ |
| " movntps %%xmm1, 16(%0)\n" \ |
| prefetch "352(%1)\n" \ |
| " movaps 32(%1), %%xmm2\n" \ |
| " movaps 48(%1), %%xmm3\n" \ |
| " movntps %%xmm2, 32(%0)\n" \ |
| " movntps %%xmm3, 48(%0)\n" \ |
| " addl $64,%0\n" \ |
| " addl $64,%1\n" \ |
| " decl %2\n" \ |
| " jne 2b\n" \ |
| :"=&D"(to), "=&S"(from), "=&r"(dummy) \ |
| :"0" (to), "1" (from), "2" (lcnt): "memory"); \ |
| } |
| |
| |
| |
| /* |
| */ |
| void via_sse_memcpy(void *to, |
| const void *from, |
| size_t sz) |
| |
| { |
| int dummy; |
| int lcnt = sz >> 6; |
| int rest = sz & 63; |
| |
| PREFETCH1(SSE_PREFETCH,from); |
| |
| if (lcnt > 5) { |
| lcnt -= 5; |
| SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt); |
| lcnt = 5; |
| } |
| if (lcnt) { |
| SSE_CPY("#",from,to,dummy,lcnt); |
| } |
| if (rest) small_memcpy(to, from, rest); |
| FENCE; |
| } |
| |
| #endif /* defined( USE_SSE_ASM ) */ |