summaryrefslogtreecommitdiffstats
path: root/src/libnr
diff options
context:
space:
mode:
authorTed Gould <ted@gould.cx>2010-05-15 18:08:17 +0000
committerTed Gould <ted@gould.cx>2010-05-15 18:08:17 +0000
commit2d8c2dfd832ce207aef3895e702bff4098ab7136 (patch)
tree642a37c6e3ca05d5e991ffe868f03c9cc58e51bc /src/libnr
parentMerge from trunk (diff)
parentMinor tweaks to text toolbar. (diff)
downloadinkscape-2d8c2dfd832ce207aef3895e702bff4098ab7136.tar.gz
inkscape-2d8c2dfd832ce207aef3895e702bff4098ab7136.zip
Updating to trunk
(bzr r8254.1.54)
Diffstat (limited to 'src/libnr')
-rw-r--r--src/libnr/Makefile_insert8
-rw-r--r--src/libnr/have_mmx.S47
-rw-r--r--src/libnr/nr-compose-transform.cpp118
-rw-r--r--src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S125
-rw-r--r--src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S231
-rw-r--r--src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S227
6 files changed, 66 insertions, 690 deletions
diff --git a/src/libnr/Makefile_insert b/src/libnr/Makefile_insert
index 4b19028f9..8dd3c46e3 100644
--- a/src/libnr/Makefile_insert
+++ b/src/libnr/Makefile_insert
@@ -1,13 +1,5 @@
## Makefile.am fragment sourced by src/Makefile.am.
-if USE_MMX
-libnr_mmx_sources = \
- libnr/have_mmx.S \
- libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S \
- libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S \
- libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
-endif
-
ink_common_sources += \
libnr/in-svg-plane.h \
libnr/nr-blit.cpp \
diff --git a/src/libnr/have_mmx.S b/src/libnr/have_mmx.S
deleted file mode 100644
index d6428191e..000000000
--- a/src/libnr/have_mmx.S
+++ /dev/null
@@ -1,47 +0,0 @@
- .file "have_mmx.S"
-
-# Ensure Inkscape is execshield protected
- .section .note.GNU-stack
- .previous
-
- .version "01.01"
-gcc2_compiled.:
-.text
- .align 16
-.globl nr_have_mmx
- .type nr_have_mmx,@function
-
-nr_have_mmx:
- push %ebx
-
-# Check if bit 21 in flags word is writeable
-
- pushfl
- popl %eax
- movl %eax,%ebx
- xorl $0x00200000, %eax
- pushl %eax
- popfl
- pushfl
- popl %eax
-
- cmpl %eax, %ebx
-
- je .notfound
-
-# OK, we have CPUID
-
- movl $1, %eax
- cpuid
-
- test $0x00800000, %edx
- jz .notfound
-
- movl $1, %eax
- jmp .out
-
-.notfound:
- movl $0, %eax
-.out:
- popl %ebx
- ret
diff --git a/src/libnr/nr-compose-transform.cpp b/src/libnr/nr-compose-transform.cpp
index 6e03faf2f..e7c286569 100644
--- a/src/libnr/nr-compose-transform.cpp
+++ b/src/libnr/nr-compose-transform.cpp
@@ -66,18 +66,25 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
if (alpha == 0) return;
+ // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow
+ if (xd+yd>16) {
+ xd = 8;
+ yd = 8;
+ }
+
xsize = (1 << xd);
ysize = (1 << yd);
size = xsize * ysize;
dbits = xd + yd;
+ unsigned int rounding_fix = size/2;
/* Set up fixed point matrix */
- FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5);
- FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5);
- FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5);
- FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5);
- FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5);
- FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5);
+ FFs_x_x = (long) floor(d2s[0] * (1 << FBITS) + 0.5);
+ FFs_x_y = (long) floor(d2s[1] * (1 << FBITS) + 0.5);
+ FFs_y_x = (long) floor(d2s[2] * (1 << FBITS) + 0.5);
+ FFs_y_y = (long) floor(d2s[3] * (1 << FBITS) + 0.5);
+ FFs__x = (long) floor(d2s[4] * (1 << FBITS) + 0.5);
+ FFs__y = (long) floor(d2s[5] * (1 << FBITS) + 0.5);
FFs_x_x_S = FFs_x_x >> xd;
FFs_x_y_S = FFs_x_y >> xd;
@@ -114,35 +121,40 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
sy = (FFsy + FF_sy_S[i]) >> FBITS;
if ((sy >= 0) && (sy < sh)) {
const unsigned char *s;
- unsigned int ca;
s = spx + sy * srs + sx * 4;
- ca = NR_PREMUL_112 (s[3], alpha);
- r += NR_PREMUL_121 (s[0], ca);
- g += NR_PREMUL_121 (s[1], ca);
- b += NR_PREMUL_121 (s[2], ca);
- a += NR_NORMALIZE_21(ca);
+ r += NR_PREMUL_112 (s[0], s[3]);
+ g += NR_PREMUL_112 (s[1], s[3]);
+ b += NR_PREMUL_112 (s[2], s[3]);
+ a += s[3];
}
}
}
- a >>= dbits;
+ a = (a*alpha + rounding_fix) >> dbits;
+ // Compare to nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P
if (a != 0) {
- r = r >> dbits;
- g = g >> dbits;
- b = b >> dbits;
- if (a == 255) {
- /* Transparent BG, premul src */
- d[0] = r;
- d[1] = g;
- d[2] = b;
- d[3] = a;
+ r = (r + rounding_fix) >> dbits;
+ g = (g + rounding_fix) >> dbits;
+ b = (b + rounding_fix) >> dbits;
+ if (a == 255*255) {
+ /* Full coverage, demul src */
+ d[0] = NR_NORMALIZE_21(r);
+ d[1] = NR_NORMALIZE_21(g);
+ d[2] = NR_NORMALIZE_21(b);
+ d[3] = NR_NORMALIZE_21(a);
+ } else if (d[3] == 0) {
+ /* Only foreground, demul src */
+ d[0] = NR_DEMUL_221(r,a);
+ d[1] = NR_DEMUL_221(g,a);
+ d[2] = NR_DEMUL_221(b,a);
+ d[3] = NR_NORMALIZE_21(a);
} else {
unsigned int ca;
/* Full composition */
- ca = NR_COMPOSEA_112(a, d[3]);
- d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
- d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
- d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
- d[3] = NR_NORMALIZE_21(ca);
+ ca = NR_COMPOSEA_213(a, d[3]);
+ d[0] = NR_COMPOSEPNN_221131 (r, a, d[0], d[3], ca);
+ d[1] = NR_COMPOSEPNN_221131 (g, a, d[1], d[3], ca);
+ d[2] = NR_COMPOSEPNN_221131 (b, a, d[2], d[3], ca);
+ d[3] = NR_NORMALIZE_31(ca);
}
}
/* Advance pointers */
@@ -227,9 +239,7 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
int x, y;
size = (1 << dbits);
- unsigned alpha_rounding_fix = size * 255;
- unsigned rgb_rounding_fix = size * (255 * 256);
- if (alpha > 127) ++alpha;
+ unsigned int rounding_fix = size/2;
d0 = px;
FFsx0 = FFd2s[4];
@@ -252,32 +262,30 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
sy = (long (FFsy >> (FBITS_HP - FBITS)) + FF_S[2 * i + 1]) >> FBITS;
if ((sy >= 0) && (sy < sh)) {
const unsigned char *s;
- unsigned int ca;
s = spx + sy * srs + sx * 4;
- ca = NR_PREMUL_112(s[3], alpha);
- r += NR_PREMUL_123(s[0], ca);
- g += NR_PREMUL_123(s[1], ca);
- b += NR_PREMUL_123(s[2], ca);
- a += ca;
+ r += NR_PREMUL_112(s[0], s[3]);
+ g += NR_PREMUL_112(s[1], s[3]);
+ b += NR_PREMUL_112(s[2], s[3]);
+ a += s[3];
}
}
}
- a = (a + alpha_rounding_fix) >> (8 + dbits);
+ a = (a*alpha + rounding_fix) >> dbits;
if (a != 0) {
- r = (r + rgb_rounding_fix) >> (16 + dbits);
- g = (g + rgb_rounding_fix) >> (16 + dbits);
- b = (b + rgb_rounding_fix) >> (16 + dbits);
- if ((a == 255) || (d[3] == 0)) {
+ r = (r + rounding_fix) >> dbits;
+ g = (g + rounding_fix) >> dbits;
+ b = (b + rounding_fix) >> dbits;
+ if ((a == 255*255) || (d[3] == 0)) {
/* Transparent BG, premul src */
- d[0] = r;
- d[1] = g;
- d[2] = b;
- d[3] = a;
+ d[0] = NR_NORMALIZE_21(r);
+ d[1] = NR_NORMALIZE_21(g);
+ d[2] = NR_NORMALIZE_21(b);
+ d[3] = NR_NORMALIZE_21(a);
} else {
- d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
- d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
- d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
- d[3] = NR_COMPOSEA_111(a, d[3]);
+ d[0] = NR_COMPOSEPPP_2211 (r, a, d[0]);
+ d[1] = NR_COMPOSEPPP_2211 (g, a, d[1]);
+ d[2] = NR_COMPOSEPPP_2211 (b, a, d[2]);
+ d[3] = NR_COMPOSEA_211(a, d[3]);
}
}
/* Advance pointers */
@@ -302,11 +310,17 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in
if (alpha == 0) return;
- dbits = xd + yd;
+ // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow
+ if (xd+yd>16) {
+ xd = 8;
+ yd = 8;
+ }
+
+ dbits = xd + yd;
for (i = 0; i < 6; i++) {
- FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
- FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
+ FFd2s[i] = (long) floor(d2s[i] * (1 << FBITS) + 0.5);
+ FFd2s_HP[i] = (long long) floor(d2s[i] * (1 << FBITS_HP) + 0.5);;
}
if (dbits == 0) {
diff --git a/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S b/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S
deleted file mode 100644
index db2cbec5a..000000000
--- a/src/libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S
+++ /dev/null
@@ -1,125 +0,0 @@
- .file "nr-compose.c"
-
-# Ensure Inkscape is execshield protected
- .section .note.GNU-stack
- .previous
-
- .text
- .align 2
-.globl nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP
- .type nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,@function
-
-/*
- * This code is in public domain
- *
- * c 32(%ebp)
- * srs 28(%ebp)
- * spx 24(%ebp)
- * rs 20(%ebp)
- * h 16(%ebp)
- * w 12(%ebp)
- * px 8(%ebp)
- * r -8(%ebp)
- * g -12(%ebp)
- * b -16(%ebp)
- * a -20(%ebp)
- * s -24(%ebp) -> %esi
- * d -28(%ebp) -> %edi
- * x -32(%ebp) -> %ebx
- * y -36(%ebp)
- * ca -40(%ebp)
- *
- * mm0 Fg
- * mm1 FgA
- * mm2 FgPre
- * mm3
- * mm4
- * mm5
- * mm6 128
- * mm7 0
- *
-*/
-
-nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP:
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- subl $36, %esp
- pushl %edi
- pushl %esi
-
-/* Load %mm7 with [0 0 0 0] */
- movl $0, %eax
- movd %eax, %mm7
-
-/* Load %mm6 with [128 128 128 128] */
- movl $0x80808080, %eax
- movd %eax, %mm6
- punpcklbw %mm7, %mm6
-
-/* FgC -> %mm0 */
- movl 32(%ebp), %eax
- movd (%eax), %mm0
- punpcklbw %mm7, %mm0
-
-/* for (y = ...) */
- movl 16(%ebp), %ecx
-.fory:
-
-/* d = px */
-/* s = spx */
- movl 8(%ebp), %edi
- movl 24(%ebp), %esi
-
-/* for (x = ...) */
- movl 12(%ebp), %ebx
-.forx:
-
-/* [m m m m] -> %mm1 */
- movzbl (%esi), %eax
- testb $0xff, %al
- jz .clip
- movd %eax, %mm1
- punpcklwd %mm1, %mm1
- punpckldq %mm1, %mm1
-
-/* Fg -> mm2 */
- movq %mm0, %mm2
- pmullw %mm1, %mm2
- paddw %mm6, %mm2
- movq %mm2, %mm3
- psrlw $8, %mm3
- paddw %mm3, %mm2
- psrlw $8, %mm2
-
-/* Store pixel */
- packuswb %mm2, %mm2
- movd %mm2, (%edi)
-
-.clip:
- addl $4, %edi
- incl %esi
-
- decl %ebx
- jnz .forx
-
- movl 20(%ebp), %eax
- addl %eax, 8(%ebp)
- movl 28(%ebp), %eax
- addl %eax, 24(%ebp)
-
- decl %ecx
- jnz .fory
-
-.exit:
- emms
- popl %esi
- popl %edi
- addl $36, %esp
- popl %ebx
- popl %ebp
- ret
-
-.Lfe1:
- .size nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP
- .ident "GCC: (GNU) 3.2"
diff --git a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S b/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S
deleted file mode 100644
index fe1d9be57..000000000
--- a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S
+++ /dev/null
@@ -1,231 +0,0 @@
- .file "nr-compose.c"
-
-# Ensure Inkscape is execshield protected
- .section .note.GNU-stack
- .previous
-
- .text
- .align 2
-.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
- .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function
-
-/*
- * This code is in public domain
- *
- * c 32(%ebp)
- * srs 28(%ebp)
- * spx 24(%ebp)
- * rs 20(%ebp)
- * h 16(%ebp)
- * w 12(%ebp)
- * px 8(%ebp)
- * r -8(%ebp)
- * g -12(%ebp)
- * b -16(%ebp)
- * a -20(%ebp)
- * s -24(%ebp) -> %esi
- * d -28(%ebp) -> %edi
- * x -32(%ebp) -> %ebx
- * y -36(%ebp)
- * ca -40(%ebp)
- *
- * mm0 Fg
- * mm1 MMMM
- * mm2 FgM
- * mm3
- * mm4
- * mm5 255
- * mm6 128
- * mm7 0
- *
-*/
-
-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP:
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- subl $36, %esp
- pushl %edi
- pushl %esi
-
-/* Load %mm7 with [0 0 0 0] */
- movl $0, %eax
- movd %eax, %mm7
-
-/* Load %mm6 with [128 128 128 128] */
- movl $0x80808080, %eax
- movd %eax, %mm6
- punpcklbw %mm7, %mm6
-
-/* Load %mm5 with [255 255 255 255] */
- movl $0xffffffff, %eax
- movd %eax, %mm5
- punpcklbw %mm7, %mm5
-
-/* FgC -> %mm0 */
- movl 32(%ebp), %eax
- movd (%eax), %mm0
- punpcklbw %mm7, %mm0
-
-/* Check full opacity */
- cmpb $0xff, %al
- jz .opaque
-
-/* for (y = ...) */
- movl 16(%ebp), %ecx
-.fory:
-
-/* d = px */
-/* s = spx */
- movl 8(%ebp), %edi
- movl 24(%ebp), %esi
-
-/* for (x = ...) */
- movl 12(%ebp), %ebx
-.forx:
-
-/* [m m m m] -> %mm1 */
- movzbl (%esi), %eax
- testb $0xff, %al
- jz .clip
- movd %eax, %mm1
- punpcklwd %mm1, %mm1
- punpckldq %mm1, %mm1
-
-/* Fg -> mm2 */
- movq %mm0, %mm2
- pmullw %mm1, %mm2
- paddw %mm6, %mm2
- movq %mm2, %mm3
- psrlw $8, %mm3
- paddw %mm3, %mm2
- psrlw $8, %mm2
-
-/* [255 - FgA] -> mm1 */
- movq %mm2, %mm1
- punpckhwd %mm1, %mm1
- punpckhdq %mm1, %mm1
- pxor %mm5, %mm1
-
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
- pmullw %mm1, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm2, %mm3
-
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, (%edi)
-
-.clip:
- addl $4, %edi
- incl %esi
-
- decl %ebx
- jnz .forx
-
- movl 20(%ebp), %eax
- addl %eax, 8(%ebp)
- movl 28(%ebp), %eax
- addl %eax, 24(%ebp)
-
- decl %ecx
- jnz .fory
-
-.exit:
- emms
- popl %esi
- popl %edi
- addl $36, %esp
- popl %ebx
- popl %ebp
- ret
-
-.opaque:
-/* for (y = ...) */
- movl 16(%ebp), %ecx
-.o_fory:
-
-/* d = px */
-/* s = spx */
- movl 8(%ebp), %edi
- movl 24(%ebp), %esi
-
-/* for (x = ...) */
- movl 12(%ebp), %ebx
-.o_forx:
-
-/* [m m m m] -> %mm1 */
- movzbl (%esi), %eax
- testb $0xff, %al
- jz .o_clip
- cmpb $0xff, %al
- jz .o_full
- movd %eax, %mm1
- punpcklwd %mm1, %mm1
- punpckldq %mm1, %mm1
-
-/* Fg -> mm2 */
- movq %mm0, %mm2
- pmullw %mm1, %mm2
- paddw %mm6, %mm2
- movq %mm2, %mm3
- psrlw $8, %mm3
- paddw %mm3, %mm2
- psrlw $8, %mm2
-
-/* [255 - FgA] -> mm1 */
- movq %mm2, %mm1
- punpckhwd %mm1, %mm1
- punpckhdq %mm1, %mm1
- pxor %mm5, %mm1
-
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
- pmullw %mm1, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm2, %mm3
-
- jmp .o_store
-
-.o_full:
- movq %mm0, %mm3
-
-.o_store:
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, (%edi)
-
-.o_clip:
- addl $4, %edi
- incl %esi
-
- decl %ebx
- jnz .o_forx
-
- movl 20(%ebp), %eax
- addl %eax, 8(%ebp)
- movl 28(%ebp), %eax
- addl %eax, 24(%ebp)
-
- decl %ecx
- jnz .o_fory
- jmp .exit
-
-.Lfe1:
- .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
- .ident "GCC: (GNU) 3.2"
diff --git a/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S b/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
deleted file mode 100644
index 37261e572..000000000
--- a/src/libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
+++ /dev/null
@@ -1,227 +0,0 @@
- .file "nr-compose.c"
-
-# Ensure Inkscape is execshield protected
- .section .note.GNU-stack
- .previous
-
- .text
- .align 2
-.globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
- .type nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function
-
-/*
- * This code is in public domain
- *
- * alpha 32(%ebp)
- * srs 28(%ebp)
- * spx 24(%ebp)
- * rs 20(%ebp)
- * h 16(%ebp)
- * w 12(%ebp)
- * px 8(%ebp)
- * r -8(%ebp)
- * g -12(%ebp)
- * b -16(%ebp)
- * a -20(%ebp)
- * s -24(%ebp) -> %esi
- * d -28(%ebp) -> %edi
- * x -32(%ebp) -> %ebx
- * y -36(%ebp)
- * ca -40(%ebp)
- *
- * mm0 A
- * mm1 FgA
- * mm2 FgPre
- * mm3
- * mm4
- * mm5 255
- * mm6 128
- * mm7 0
- *
-*/
-
-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P:
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- subl $36, %esp
- pushl %edi
- pushl %esi
-
-/* Load %mm7 with [0 0 0 0] */
- movl $0, %eax
- movd %eax, %mm7
-
-/* Load %mm6 with [128 128 128 128] */
- movl $0x80808080, %eax
- movd %eax, %mm6
- punpcklbw %mm7, %mm6
-
-/* Load %mm5 with [255 255 255 255] */
- movl $0xffffffff, %eax
- movd %eax, %mm5
- punpcklbw %mm7, %mm5
-
-/* Load %mm0 with [a a a a] */
-/* Check full opacity */
- movzbl 32(%ebp), %eax
- cmpb $0xff, %al
- jz .opaque
- movd %eax, %mm0
- punpcklwd %mm0, %mm0
- punpckldq %mm0, %mm0
-
-/* for (y = ...) */
- movl 16(%ebp), %ecx
-.fory:
-
-/* d = px */
-/* s = spx */
- movl 8(%ebp), %edi
- movl 24(%ebp), %esi
-
-/* for (x = ...) */
- movl 12(%ebp), %ebx
-.forx:
-
-/* Fg -> %mm1 */
-/* fixme: Do we have to bother about alignment here? (Lauris) */
- movl (%esi), %eax
- testl $0xff000000, %eax
- jz .clip
- movd %eax, %mm1
- punpcklbw %mm7, %mm1
-
-/* [Fg * a] -> mm1 */
- pmullw %mm0, %mm1
- paddw %mm6, %mm1
- movq %mm1, %mm2
- psrlw $8, %mm2
- paddw %mm2, %mm1
- psrlw $8, %mm1
-
-/* [255 - FgA] -> mm2 */
- movq %mm1, %mm2
- punpckhwd %mm2, %mm2
- punpckhdq %mm2, %mm2
- pxor %mm5, %mm2
-
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
- pmullw %mm2, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm1, %mm3
-
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, %eax
- movb %al, 0(%edi)
- shrl $8, %eax
- movb %al, 1(%edi)
- shrl $8, %eax
- movb %al, 2(%edi)
-
-.clip:
- addl $3, %edi
- addl $4, %esi
-
- decl %ebx
- jnz .forx
-
- movl 20(%ebp), %eax
- addl %eax, 8(%ebp)
- movl 28(%ebp), %eax
- addl %eax, 24(%ebp)
-
- decl %ecx
- jnz .fory
-
-.exit:
- emms
- popl %esi
- popl %edi
- addl $36, %esp
- popl %ebx
- popl %ebp
- ret
-
-.opaque:
-/* for (y = ...) */
- movl 16(%ebp), %ecx
-.o_fory:
-
-/* d = px */
-/* s = spx */
- movl 8(%ebp), %edi
- movl 24(%ebp), %esi
-
-/* for (x = ...) */
- movl 12(%ebp), %ebx
-.o_forx:
-
-/* Fg -> %mm1 */
-/* fixme: Do we have to bother about alignment here? (Lauris) */
- movl (%esi), %eax
- testl $0xff000000, %eax
- jz .o_clip
- cmpl $0xff000000, %eax
- jnb .o_store
- movd %eax, %mm1
- punpcklbw %mm7, %mm1
-
-/* [255 - FgA] -> mm2 */
- movq %mm1, %mm2
- punpckhwd %mm2, %mm2
- punpckhdq %mm2, %mm2
- pxor %mm5, %mm2
-
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
- pmullw %mm2, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm1, %mm3
-
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, %eax
-.o_store:
- movb %al, 0(%edi)
- shrl $8, %eax
- movb %al, 1(%edi)
- shrl $8, %eax
- movb %al, 2(%edi)
-
-.o_clip:
- addl $3, %edi
- addl $4, %esi
-
- decl %ebx
- jnz .o_forx
-
- movl 20(%ebp), %eax
- addl %eax, 8(%ebp)
- movl 28(%ebp), %eax
- addl %eax, 24(%ebp)
-
- decl %ecx
- jnz .o_fory
-
- jmp .exit
-
-.Lfe1:
- .size nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
- .ident "GCC: (GNU) 3.2"