diff options
| author | Ted Gould <ted@gould.cx> | 2010-05-15 18:08:17 +0000 |
|---|---|---|
| committer | Ted Gould <ted@gould.cx> | 2010-05-15 18:08:17 +0000 |
| commit | 2d8c2dfd832ce207aef3895e702bff4098ab7136 (patch) | |
| tree | 642a37c6e3ca05d5e991ffe868f03c9cc58e51bc /src/libnr | |
| parent | Merge from trunk (diff) | |
| parent | Minor tweaks to text toolbar. (diff) | |
| download | inkscape-2d8c2dfd832ce207aef3895e702bff4098ab7136.tar.gz inkscape-2d8c2dfd832ce207aef3895e702bff4098ab7136.zip | |
Updating to trunk
(bzr r8254.1.54)
Diffstat (limited to 'src/libnr')
| -rw-r--r-- | src/libnr/Makefile_insert | 8 | ||||
| -rw-r--r-- | src/libnr/have_mmx.S | 47 | ||||
| -rw-r--r-- | src/libnr/nr-compose-transform.cpp | 118 | ||||
| -rw-r--r-- | src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S | 125 | ||||
| -rw-r--r-- | src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S | 231 | ||||
| -rw-r--r-- | src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S | 227 |
6 files changed, 66 insertions, 690 deletions
diff --git a/src/libnr/Makefile_insert b/src/libnr/Makefile_insert index 4b19028f9..8dd3c46e3 100644 --- a/src/libnr/Makefile_insert +++ b/src/libnr/Makefile_insert @@ -1,13 +1,5 @@ ## Makefile.am fragment sourced by src/Makefile.am. -if USE_MMX -libnr_mmx_sources = \ - libnr/have_mmx.S \ - libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S \ - libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S \ - libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S -endif - ink_common_sources += \ libnr/in-svg-plane.h \ libnr/nr-blit.cpp \ diff --git a/src/libnr/have_mmx.S b/src/libnr/have_mmx.S deleted file mode 100644 index d6428191e..000000000 --- a/src/libnr/have_mmx.S +++ /dev/null @@ -1,47 +0,0 @@ - .file "have_mmx.S" - -# Ensure Inkscape is execshield protected - .section .note.GNU-stack - .previous - - .version "01.01" -gcc2_compiled.: -.text - .align 16 -.globl nr_have_mmx - .type nr_have_mmx,@function - -nr_have_mmx: - push %ebx - -# Check if bit 21 in flags word is writeable - - pushfl - popl %eax - movl %eax,%ebx - xorl $0x00200000, %eax - pushl %eax - popfl - pushfl - popl %eax - - cmpl %eax, %ebx - - je .notfound - -# OK, we have CPUID - - movl $1, %eax - cpuid - - test $0x00800000, %edx - jz .notfound - - movl $1, %eax - jmp .out - -.notfound: - movl $0, %eax -.out: - popl %ebx - ret diff --git a/src/libnr/nr-compose-transform.cpp b/src/libnr/nr-compose-transform.cpp index 6e03faf2f..e7c286569 100644 --- a/src/libnr/nr-compose-transform.cpp +++ b/src/libnr/nr-compose-transform.cpp @@ -66,18 +66,25 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, if (alpha == 0) return; + // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow + if (xd+yd>16) { + xd = 8; + yd = 8; + } + xsize = (1 << xd); ysize = (1 << yd); size = xsize * ysize; dbits = xd + yd; + unsigned int rounding_fix = size/2; /* Set up fixed point matrix */ - FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5); - FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5); - FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5); - FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5); - FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5); - FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5); + FFs_x_x = (long) floor(d2s[0] * (1 << FBITS) + 0.5); + FFs_x_y = (long) floor(d2s[1] * (1 << FBITS) + 0.5); + FFs_y_x = (long) floor(d2s[2] * (1 << FBITS) + 0.5); + FFs_y_y = (long) floor(d2s[3] * (1 << FBITS) + 0.5); + FFs__x = (long) floor(d2s[4] * (1 << FBITS) + 0.5); + FFs__y = (long) floor(d2s[5] * (1 << FBITS) + 0.5); FFs_x_x_S = FFs_x_x >> xd; FFs_x_y_S = FFs_x_y >> xd; @@ -114,35 +121,40 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, sy = (FFsy + FF_sy_S[i]) >> FBITS; if ((sy >= 0) && (sy < sh)) { const unsigned char *s; - unsigned int ca; s = spx + sy * srs + sx * 4; - ca = NR_PREMUL_112 (s[3], alpha); - r += NR_PREMUL_121 (s[0], ca); - g += NR_PREMUL_121 (s[1], ca); - b += NR_PREMUL_121 (s[2], ca); - a += NR_NORMALIZE_21(ca); + r += NR_PREMUL_112 (s[0], s[3]); + g += NR_PREMUL_112 (s[1], s[3]); + b += NR_PREMUL_112 (s[2], s[3]); + a += s[3]; } } } - a >>= dbits; + a = (a*alpha + rounding_fix) >> dbits; + // Compare to nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P if (a != 0) { - r = r >> dbits; - g = g >> dbits; - b = b >> dbits; - if (a == 255) { - /* Transparent BG, premul src */ - d[0] = r; - d[1] = g; - d[2] = b; - d[3] = a; + r = (r + rounding_fix) >> dbits; + g = (g + rounding_fix) >> dbits; + b = (b + rounding_fix) >> dbits; + if (a == 255*255) { + /* Full coverage, demul src */ + d[0] = NR_NORMALIZE_21(r); + d[1] = NR_NORMALIZE_21(g); + d[2] = NR_NORMALIZE_21(b); + d[3] = NR_NORMALIZE_21(a); + } else if (d[3] == 0) { + /* Only foreground, demul src */ + d[0] = NR_DEMUL_221(r,a); + d[1] = NR_DEMUL_221(g,a); + d[2] = NR_DEMUL_221(b,a); + d[3] = NR_NORMALIZE_21(a); } else { unsigned int ca; /* Full composition */ - ca = NR_COMPOSEA_112(a, d[3]); - d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca); - d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca); - d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca); - d[3] = NR_NORMALIZE_21(ca); + ca = NR_COMPOSEA_213(a, d[3]); + d[0] = NR_COMPOSEPNN_221131 (r, a, d[0], d[3], ca); + d[1] = NR_COMPOSEPNN_221131 (g, a, d[1], d[3], ca); + d[2] = NR_COMPOSEPNN_221131 (b, a, d[2], d[3], ca); + d[3] = NR_NORMALIZE_31(ca); } } /* Advance pointers */ @@ -227,9 +239,7 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h int x, y; size = (1 << dbits); - unsigned alpha_rounding_fix = size * 255; - unsigned rgb_rounding_fix = size * (255 * 256); - if (alpha > 127) ++alpha; + unsigned int rounding_fix = size/2; d0 = px; FFsx0 = FFd2s[4]; @@ -252,32 +262,30 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h sy = (long (FFsy >> (FBITS_HP - FBITS)) + FF_S[2 * i + 1]) >> FBITS; if ((sy >= 0) && (sy < sh)) { const unsigned char *s; - unsigned int ca; s = spx + sy * srs + sx * 4; - ca = NR_PREMUL_112(s[3], alpha); - r += NR_PREMUL_123(s[0], ca); - g += NR_PREMUL_123(s[1], ca); - b += NR_PREMUL_123(s[2], ca); - a += ca; + r += NR_PREMUL_112(s[0], s[3]); + g += NR_PREMUL_112(s[1], s[3]); + b += NR_PREMUL_112(s[2], s[3]); + a += s[3]; } } } - a = (a + alpha_rounding_fix) >> (8 + dbits); + a = (a*alpha + rounding_fix) >> dbits; if (a != 0) { - r = (r + rgb_rounding_fix) >> (16 + dbits); - g = (g + rgb_rounding_fix) >> (16 + dbits); - b = (b + rgb_rounding_fix) >> (16 + dbits); - if ((a == 255) || (d[3] == 0)) { + r = (r + rounding_fix) >> dbits; + g = (g + rounding_fix) >> dbits; + b = (b + rounding_fix) >> dbits; + if ((a == 255*255) || (d[3] == 0)) { /* Transparent BG, premul src */ - d[0] = r; - d[1] = g; - d[2] = b; - d[3] = a; + d[0] = NR_NORMALIZE_21(r); + d[1] = NR_NORMALIZE_21(g); + d[2] = NR_NORMALIZE_21(b); + d[3] = NR_NORMALIZE_21(a); } else { - d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]); - d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]); - d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]); - d[3] = NR_COMPOSEA_111(a, d[3]); + d[0] = NR_COMPOSEPPP_2211 (r, a, d[0]); + d[1] = NR_COMPOSEPPP_2211 (g, a, d[1]); + d[2] = NR_COMPOSEPPP_2211 (b, a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); } } /* Advance pointers */ @@ -302,11 +310,17 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in if (alpha == 0) return; - dbits = xd + yd; + // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow + if (xd+yd>16) { + xd = 8; + yd = 8; + } + + dbits = xd + yd; for (i = 0; i < 6; i++) { - FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5); - FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);; + FFd2s[i] = (long) floor(d2s[i] * (1 << FBITS) + 0.5); + FFd2s_HP[i] = (long long) floor(d2s[i] * (1 << FBITS_HP) + 0.5);; } if (dbits == 0) { diff --git a/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S b/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S deleted file mode 100644 index db2cbec5a..000000000 --- a/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S +++ /dev/null @@ -1,125 +0,0 @@ - .file "nr-compose.c" - -# Ensure Inkscape is execshield protected - .section .note.GNU-stack - .previous - - .text - .align 2 -.globl nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP - .type nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,@function - -/* - * This code is in public domain - * - * c 32(%ebp) - * srs 28(%ebp) - * spx 24(%ebp) - * rs 20(%ebp) - * h 16(%ebp) - * w 12(%ebp) - * px 8(%ebp) - * r -8(%ebp) - * g -12(%ebp) - * b -16(%ebp) - * a -20(%ebp) - * s -24(%ebp) -> %esi - * d -28(%ebp) -> %edi - * x -32(%ebp) -> %ebx - * y -36(%ebp) - * ca -40(%ebp) - * - * mm0 Fg - * mm1 FgA - * mm2 FgPre - * mm3 - * mm4 - * mm5 - * mm6 128 - * mm7 0 - * -*/ - -nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP: - pushl %ebp - movl %esp, %ebp - pushl %ebx - subl $36, %esp - pushl %edi - pushl %esi - -/* Load %mm7 with [0 0 0 0] */ - movl $0, %eax - movd %eax, %mm7 - -/* Load %mm6 with [128 128 128 128] */ - movl $0x80808080, %eax - movd %eax, %mm6 - punpcklbw %mm7, %mm6 - -/* FgC -> %mm0 */ - movl 32(%ebp), %eax - movd (%eax), %mm0 - punpcklbw %mm7, %mm0 - -/* for (y = ...) */ - movl 16(%ebp), %ecx -.fory: - -/* d = px */ -/* s = spx */ - movl 8(%ebp), %edi - movl 24(%ebp), %esi - -/* for (x = ...) */ - movl 12(%ebp), %ebx -.forx: - -/* [m m m m] -> %mm1 */ - movzbl (%esi), %eax - testb $0xff, %al - jz .clip - movd %eax, %mm1 - punpcklwd %mm1, %mm1 - punpckldq %mm1, %mm1 - -/* Fg -> mm2 */ - movq %mm0, %mm2 - pmullw %mm1, %mm2 - paddw %mm6, %mm2 - movq %mm2, %mm3 - psrlw $8, %mm3 - paddw %mm3, %mm2 - psrlw $8, %mm2 - -/* Store pixel */ - packuswb %mm2, %mm2 - movd %mm2, (%edi) - -.clip: - addl $4, %edi - incl %esi - - decl %ebx - jnz .forx - - movl 20(%ebp), %eax - addl %eax, 8(%ebp) - movl 28(%ebp), %eax - addl %eax, 24(%ebp) - - decl %ecx - jnz .fory - -.exit: - emms - popl %esi - popl %edi - addl $36, %esp - popl %ebx - popl %ebp - ret - -.Lfe1: - .size nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP - .ident "GCC: (GNU) 3.2" diff --git a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S b/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S deleted file mode 100644 index fe1d9be57..000000000 --- a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S +++ /dev/null @@ -1,231 +0,0 @@ - .file "nr-compose.c" - -# Ensure Inkscape is execshield protected - .section .note.GNU-stack - .previous - - .text - .align 2 -.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP - .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function - -/* - * This code is in public domain - * - * c 32(%ebp) - * srs 28(%ebp) - * spx 24(%ebp) - * rs 20(%ebp) - * h 16(%ebp) - * w 12(%ebp) - * px 8(%ebp) - * r -8(%ebp) - * g -12(%ebp) - * b -16(%ebp) - * a -20(%ebp) - * s -24(%ebp) -> %esi - * d -28(%ebp) -> %edi - * x -32(%ebp) -> %ebx - * y -36(%ebp) - * ca -40(%ebp) - * - * mm0 Fg - * mm1 MMMM - * mm2 FgM - * mm3 - * mm4 - * mm5 255 - * mm6 128 - * mm7 0 - * -*/ - -nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP: - pushl %ebp - movl %esp, %ebp - pushl %ebx - subl $36, %esp - pushl %edi - pushl %esi - -/* Load %mm7 with [0 0 0 0] */ - movl $0, %eax - movd %eax, %mm7 - -/* Load %mm6 with [128 128 128 128] */ - movl $0x80808080, %eax - movd %eax, %mm6 - punpcklbw %mm7, %mm6 - -/* Load %mm5 with [255 255 255 255] */ - movl $0xffffffff, %eax - movd %eax, %mm5 - punpcklbw %mm7, %mm5 - -/* FgC -> %mm0 */ - movl 32(%ebp), %eax - movd (%eax), %mm0 - punpcklbw %mm7, %mm0 - -/* Check full opacity */ - cmpb $0xff, %al - jz .opaque - -/* for (y = ...) */ - movl 16(%ebp), %ecx -.fory: - -/* d = px */ -/* s = spx */ - movl 8(%ebp), %edi - movl 24(%ebp), %esi - -/* for (x = ...) */ - movl 12(%ebp), %ebx -.forx: - -/* [m m m m] -> %mm1 */ - movzbl (%esi), %eax - testb $0xff, %al - jz .clip - movd %eax, %mm1 - punpcklwd %mm1, %mm1 - punpckldq %mm1, %mm1 - -/* Fg -> mm2 */ - movq %mm0, %mm2 - pmullw %mm1, %mm2 - paddw %mm6, %mm2 - movq %mm2, %mm3 - psrlw $8, %mm3 - paddw %mm3, %mm2 - psrlw $8, %mm2 - -/* [255 - FgA] -> mm1 */ - movq %mm2, %mm1 - punpckhwd %mm1, %mm1 - punpckhdq %mm1, %mm1 - pxor %mm5, %mm1 - -/* Bg -> mm3 */ - movd (%edi), %mm3 - punpcklbw %mm7, %mm3 - -/* Fg + ((255 - FgA) * Bg) / 255 */ - pmullw %mm1, %mm3 - paddw %mm6, %mm3 - movq %mm3, %mm4 - psrlw $8, %mm4 - paddw %mm4, %mm3 - psrlw $8, %mm3 - paddw %mm2, %mm3 - -/* Store pixel */ - packuswb %mm3, %mm3 - movd %mm3, (%edi) - -.clip: - addl $4, %edi - incl %esi - - decl %ebx - jnz .forx - - movl 20(%ebp), %eax - addl %eax, 8(%ebp) - movl 28(%ebp), %eax - addl %eax, 24(%ebp) - - decl %ecx - jnz .fory - -.exit: - emms - popl %esi - popl %edi - addl $36, %esp - popl %ebx - popl %ebp - ret - -.opaque: -/* for (y = ...) */ - movl 16(%ebp), %ecx -.o_fory: - -/* d = px */ -/* s = spx */ - movl 8(%ebp), %edi - movl 24(%ebp), %esi - -/* for (x = ...) */ - movl 12(%ebp), %ebx -.o_forx: - -/* [m m m m] -> %mm1 */ - movzbl (%esi), %eax - testb $0xff, %al - jz .o_clip - cmpb $0xff, %al - jz .o_full - movd %eax, %mm1 - punpcklwd %mm1, %mm1 - punpckldq %mm1, %mm1 - -/* Fg -> mm2 */ - movq %mm0, %mm2 - pmullw %mm1, %mm2 - paddw %mm6, %mm2 - movq %mm2, %mm3 - psrlw $8, %mm3 - paddw %mm3, %mm2 - psrlw $8, %mm2 - -/* [255 - FgA] -> mm1 */ - movq %mm2, %mm1 - punpckhwd %mm1, %mm1 - punpckhdq %mm1, %mm1 - pxor %mm5, %mm1 - -/* Bg -> mm3 */ - movd (%edi), %mm3 - punpcklbw %mm7, %mm3 - -/* Fg + ((255 - FgA) * Bg) / 255 */ - pmullw %mm1, %mm3 - paddw %mm6, %mm3 - movq %mm3, %mm4 - psrlw $8, %mm4 - paddw %mm4, %mm3 - psrlw $8, %mm3 - paddw %mm2, %mm3 - - jmp .o_store - -.o_full: - movq %mm0, %mm3 - -.o_store: -/* Store pixel */ - packuswb %mm3, %mm3 - movd %mm3, (%edi) - -.o_clip: - addl $4, %edi - incl %esi - - decl %ebx - jnz .o_forx - - movl 20(%ebp), %eax - addl %eax, 8(%ebp) - movl 28(%ebp), %eax - addl %eax, 24(%ebp) - - decl %ecx - jnz .o_fory - jmp .exit - -.Lfe1: - .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP - .ident "GCC: (GNU) 3.2" diff --git a/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S b/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S deleted file mode 100644 index 37261e572..000000000 --- a/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S +++ /dev/null @@ -1,227 +0,0 @@ - .file "nr-compose.c" - -# Ensure Inkscape is execshield protected - .section .note.GNU-stack - .previous - - .text - .align 2 -.globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P - .type nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function - -/* - * This code is in public domain - * - * alpha 32(%ebp) - * srs 28(%ebp) - * spx 24(%ebp) - * rs 20(%ebp) - * h 16(%ebp) - * w 12(%ebp) - * px 8(%ebp) - * r -8(%ebp) - * g -12(%ebp) - * b -16(%ebp) - * a -20(%ebp) - * s -24(%ebp) -> %esi - * d -28(%ebp) -> %edi - * x -32(%ebp) -> %ebx - * y -36(%ebp) - * ca -40(%ebp) - * - * mm0 A - * mm1 FgA - * mm2 FgPre - * mm3 - * mm4 - * mm5 255 - * mm6 128 - * mm7 0 - * -*/ - -nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P: - pushl %ebp - movl %esp, %ebp - pushl %ebx - subl $36, %esp - pushl %edi - pushl %esi - -/* Load %mm7 with [0 0 0 0] */ - movl $0, %eax - movd %eax, %mm7 - -/* Load %mm6 with [128 128 128 128] */ - movl $0x80808080, %eax - movd %eax, %mm6 - punpcklbw %mm7, %mm6 - -/* Load %mm5 with [255 255 255 255] */ - movl $0xffffffff, %eax - movd %eax, %mm5 - punpcklbw %mm7, %mm5 - -/* Load %mm0 with [a a a a] */ -/* Check full opacity */ - movzbl 32(%ebp), %eax - cmpb $0xff, %al - jz .opaque - movd %eax, %mm0 - punpcklwd %mm0, %mm0 - punpckldq %mm0, %mm0 - -/* for (y = ...) */ - movl 16(%ebp), %ecx -.fory: - -/* d = px */ -/* s = spx */ - movl 8(%ebp), %edi - movl 24(%ebp), %esi - -/* for (x = ...) */ - movl 12(%ebp), %ebx -.forx: - -/* Fg -> %mm1 */ -/* fixme: Do we have to bother about alignment here? (Lauris) */ - movl (%esi), %eax - testl $0xff000000, %eax - jz .clip - movd %eax, %mm1 - punpcklbw %mm7, %mm1 - -/* [Fg * a] -> mm1 */ - pmullw %mm0, %mm1 - paddw %mm6, %mm1 - movq %mm1, %mm2 - psrlw $8, %mm2 - paddw %mm2, %mm1 - psrlw $8, %mm1 - -/* [255 - FgA] -> mm2 */ - movq %mm1, %mm2 - punpckhwd %mm2, %mm2 - punpckhdq %mm2, %mm2 - pxor %mm5, %mm2 - -/* Bg -> mm3 */ - movd (%edi), %mm3 - punpcklbw %mm7, %mm3 - -/* Fg + ((255 - FgA) * Bg) / 255 */ - pmullw %mm2, %mm3 - paddw %mm6, %mm3 - movq %mm3, %mm4 - psrlw $8, %mm4 - paddw %mm4, %mm3 - psrlw $8, %mm3 - paddw %mm1, %mm3 - -/* Store pixel */ - packuswb %mm3, %mm3 - movd %mm3, %eax - movb %al, 0(%edi) - shrl $8, %eax - movb %al, 1(%edi) - shrl $8, %eax - movb %al, 2(%edi) - -.clip: - addl $3, %edi - addl $4, %esi - - decl %ebx - jnz .forx - - movl 20(%ebp), %eax - addl %eax, 8(%ebp) - movl 28(%ebp), %eax - addl %eax, 24(%ebp) - - decl %ecx - jnz .fory - -.exit: - emms - popl %esi - popl %edi - addl $36, %esp - popl %ebx - popl %ebp - ret - -.opaque: -/* for (y = ...) */ - movl 16(%ebp), %ecx -.o_fory: - -/* d = px */ -/* s = spx */ - movl 8(%ebp), %edi - movl 24(%ebp), %esi - -/* for (x = ...) */ - movl 12(%ebp), %ebx -.o_forx: - -/* Fg -> %mm1 */ -/* fixme: Do we have to bother about alignment here? (Lauris) */ - movl (%esi), %eax - testl $0xff000000, %eax - jz .o_clip - cmpl $0xff000000, %eax - jnb .o_store - movd %eax, %mm1 - punpcklbw %mm7, %mm1 - -/* [255 - FgA] -> mm2 */ - movq %mm1, %mm2 - punpckhwd %mm2, %mm2 - punpckhdq %mm2, %mm2 - pxor %mm5, %mm2 - -/* Bg -> mm3 */ - movd (%edi), %mm3 - punpcklbw %mm7, %mm3 - -/* Fg + ((255 - FgA) * Bg) / 255 */ - pmullw %mm2, %mm3 - paddw %mm6, %mm3 - movq %mm3, %mm4 - psrlw $8, %mm4 - paddw %mm4, %mm3 - psrlw $8, %mm3 - paddw %mm1, %mm3 - -/* Store pixel */ - packuswb %mm3, %mm3 - movd %mm3, %eax -.o_store: - movb %al, 0(%edi) - shrl $8, %eax - movb %al, 1(%edi) - shrl $8, %eax - movb %al, 2(%edi) - -.o_clip: - addl $3, %edi - addl $4, %esi - - decl %ebx - jnz .o_forx - - movl 20(%ebp), %eax - addl %eax, 8(%ebp) - movl 28(%ebp), %eax - addl %eax, 24(%ebp) - - decl %ecx - jnz .o_fory - - jmp .exit - -.Lfe1: - .size nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P - .ident "GCC: (GNU) 3.2" |
