summaryrefslogtreecommitdiffstats
path: root/src/libnr
diff options
context:
space:
mode:
authorTed Gould <ted@gould.cx>2009-12-21 16:37:12 +0000
committerTed Gould <ted@gould.cx>2009-12-21 16:37:12 +0000
commit752a8f90d3442cdaa4689ba6de4b911ca4fda514 (patch)
tree5e0739ec9bd2ac9cbdd2a2343859f89e02dae181 /src/libnr
parentMerging in from trunk (diff)
parentUpdating the READMEs to better handle OSX. (diff)
downloadinkscape-752a8f90d3442cdaa4689ba6de4b911ca4fda514.tar.gz
inkscape-752a8f90d3442cdaa4689ba6de4b911ca4fda514.zip
Updating to current trunk
(bzr r8254.1.38)
Diffstat (limited to 'src/libnr')
-rw-r--r--src/libnr/Makefile_insert1
-rw-r--r--src/libnr/nr-compose-transform.cpp59
-rw-r--r--src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S414
3 files changed, 21 insertions, 453 deletions
diff --git a/src/libnr/Makefile_insert b/src/libnr/Makefile_insert
index 5cd2717be..4b19028f9 100644
--- a/src/libnr/Makefile_insert
+++ b/src/libnr/Makefile_insert
@@ -5,7 +5,6 @@ libnr_mmx_sources = \
libnr/have_mmx.S \
libnr/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S \
libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S \
- libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S \
libnr/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
endif
diff --git a/src/libnr/nr-compose-transform.cpp b/src/libnr/nr-compose-transform.cpp
index afc8fd987..6e03faf2f 100644
--- a/src/libnr/nr-compose-transform.cpp
+++ b/src/libnr/nr-compose-transform.cpp
@@ -16,30 +16,25 @@
#include "nr-pixops.h"
#include "nr-matrix.h"
-
-#ifdef WITH_MMX
+/*#ifdef WITH_MMX
#ifdef __cplusplus
extern "C" {
-#endif /* __cplusplus */
-/* fixme: */
-int nr_have_mmx (void);
-void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
- const unsigned char *spx, int sw, int sh, int srs,
- const long *FFd2s, unsigned int alpha);
-void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
- const unsigned char *spx, int sw, int sh, int srs,
- const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
+#endif // __cplusplus
+/ * fixme: * /
+/ *int nr_have_mmx (void);
#define NR_PIXOPS_MMX (1 && nr_have_mmx ())
#ifdef __cplusplus
}
#endif //__cplusplus
#endif
+*/
/* fixme: Implement missing (Lauris) */
/* fixme: PREMUL colors before calculating average (Lauris) */
/* Fixed point precision */
#define FBITS 12
+#define FBITS_HP 18 // In some places we need a higher precision
void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
const unsigned char *spx, int sw, int sh, int srs,
@@ -168,10 +163,10 @@ void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, in
static void
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
const unsigned char *spx, int sw, int sh, int srs,
- const long *FFd2s, unsigned int alpha)
+ const long long *FFd2s, unsigned int alpha)
{
- unsigned char *d0;
- int FFsx0, FFsy0;
+ unsigned char *d0;
+ long long FFsx0, FFsy0;
int x, y;
d0 = px;
@@ -180,15 +175,15 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h
for (y = 0; y < h; y++) {
unsigned char *d;
- long FFsx, FFsy;
+ long long FFsx, FFsy;
d = d0;
FFsx = FFsx0;
FFsy = FFsy0;
for (x = 0; x < w; x++) {
long sx, sy;
- sx = FFsx >> FBITS;
+ sx = long(FFsx >> FBITS_HP);
if ((sx >= 0) && (sx < sw)) {
- sy = FFsy >> FBITS;
+ sy = long(FFsy >> FBITS_HP);
if ((sy >= 0) && (sy < sh)) {
const unsigned char *s;
unsigned int a;
@@ -224,11 +219,11 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h
static void
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
const unsigned char *spx, int sw, int sh, int srs,
- const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
+ const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
{
int size;
unsigned char *d0;
- int FFsx0, FFsy0;
+ long long FFsx0, FFsy0;
int x, y;
size = (1 << dbits);
@@ -242,7 +237,7 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
for (y = 0; y < h; y++) {
unsigned char *d;
- long FFsx, FFsy;
+ long long FFsx, FFsy;
d = d0;
FFsx = FFsx0;
FFsy = FFsy0;
@@ -252,9 +247,9 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
r = g = b = a = 0;
for (i = 0; i < size; i++) {
long sx, sy;
- sx = (FFsx + FF_S[2 * i]) >> FBITS;
+ sx = (long (FFsx >> (FBITS_HP - FBITS)) + FF_S[2 * i]) >> FBITS;
if ((sx >= 0) && (sx < sw)) {
- sy = (FFsy + FF_S[2 * i + 1]) >> FBITS;
+ sy = (long (FFsy >> (FBITS_HP - FBITS)) + FF_S[2 * i + 1]) >> FBITS;
if ((sy >= 0) && (sy < sh)) {
const unsigned char *s;
unsigned int ca;
@@ -302,6 +297,7 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in
{
int dbits;
long FFd2s[6];
+ long long FFd2s_HP[6]; // with higher precision
int i;
if (alpha == 0) return;
@@ -310,17 +306,11 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in
for (i = 0; i < 6; i++) {
FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
+ FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
}
if (dbits == 0) {
-#ifdef WITH_MMX
- if (NR_PIXOPS_MMX) {
- /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
- nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
- return;
- }
-#endif
- nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
+ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
} else {
int xsize, ysize;
long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
@@ -344,14 +334,7 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in
}
}
-#ifdef WITH_MMX
- if (NR_PIXOPS_MMX) {
- /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
- nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
- return;
- }
-#endif
- nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
+ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);
}
}
diff --git a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S b/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S
deleted file mode 100644
index e30056af2..000000000
--- a/src/libnr/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S
+++ /dev/null
@@ -1,414 +0,0 @@
- .file "nr-compose-transform.c"
-
-# Ensure Inkscape is execshield protected
- .section .note.GNU-stack
- .previous
-
- .text
- .align 2
-.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
- .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,@function
-
-/*
- * This code is in public domain
- *
- */
-
-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0:
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- subl $48, %esp
- pushl %edi
- pushl %esi
-
-/* Load %mm7 with [0 0 0 0] */
- movl $0, %eax
- movd %eax, %mm7
-
-/* Load %mm6 with [128 128 128 128] */
- movl $0x80808080, %eax
- movd %eax, %mm6
- punpcklbw %mm7, %mm6
-
-/* Load %mm5 with [255 255 255 255] */
- movl $0xffffffff, %eax
- movd %eax, %mm5
- punpcklbw %mm7, %mm5
-
-/* Load %mm0 with [a a a a] */
- movzbl 44(%ebp), %eax
- movd %eax, %mm0
- punpcklwd %mm0, %mm0
- punpckldq %mm0, %mm0
-
- movl 8(%ebp), %eax
- movl %eax, -8(%ebp)
- movl 40(%ebp), %eax
- addl $16, %eax
- movl (%eax), %eax
- movl %eax, -12(%ebp)
- movl 40(%ebp), %eax
- addl $20, %eax
- movl (%eax), %eax
- movl %eax, -16(%ebp)
- movl $0, -24(%ebp)
-.L29:
- movl -24(%ebp), %eax
- cmpl 16(%ebp), %eax
- jl .L32
- jmp .L28
-.L32:
- movl -8(%ebp), %edi
-
- movl -12(%ebp), %eax
- movl %eax, %esi
- movl -16(%ebp), %eax
- movl %eax, -36(%ebp)
-
- movl 12(%ebp), %ebx
-.for_x_0:
-
- movl %esi, %ecx
- cmpl $0, %ecx
- js .clip_0
- sarl $12, %ecx
- cmpl 28(%ebp), %ecx
- jge .clip_0
- shll $2, %ecx
-
- movl -36(%ebp), %eax
- cmpl $0, %eax
- js .clip_0
- sarl $12, %eax
- cmpl 32(%ebp), %eax
- jge .clip_0
- imull 36(%ebp), %eax
-
- addl %ecx, %eax
- addl 24(%ebp), %eax
-
-/* Fg -> %mm1 */
- movl (%eax), %eax
- testl $0xff000000, %eax
- jz .clip_0
- movd %eax, %mm1
- punpcklbw %mm7, %mm1
-
-/* [a a a 255] -> %mm3 */
- shrl $24, %eax
- movl $0x10101, %edx
- mull %edx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm7, %mm3
-
-/* [Fg * a] -> mm1 */
- pmullw %mm3, %mm1
- paddw %mm6, %mm1
- movq %mm1, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm1
- psrlw $8, %mm1
-
-/* Multiply by alpha */
- pmullw %mm0, %mm1
- paddw %mm6, %mm1
- movq %mm1, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm1
- psrlw $8, %mm1
-
-/* [255 - FgA] -> mm2 */
- movq %mm1, %mm2
- punpckhwd %mm2, %mm2
- punpckhdq %mm2, %mm2
- pxor %mm5, %mm2
-
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
-
- pmullw %mm2, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm1, %mm3
-
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, (%edi)
-
-.clip_0:
-.L37:
- movl 40(%ebp), %ecx
- movl (%ecx), %edx
- addl %edx, %esi
- movl 4(%ecx), %edx
- addl %edx, -36(%ebp)
-
- addl $4, %edi
-
- decl %ebx
- jnz .for_x_0
-
-.L34:
- movl 8(%ecx), %edx
- addl %edx, -12(%ebp)
- movl 12(%ecx), %edx
- addl %edx, -16(%ebp)
-
- movl 20(%ebp), %edx
- leal -8(%ebp), %eax
- addl %edx, (%eax)
- leal -24(%ebp), %eax
- incl (%eax)
- jmp .L29
-.L28:
- emms
- popl %esi
- popl %edi
- addl $48, %esp
- popl %ebx
- popl %ebp
- ret
-.Lfe2:
- .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,.Lfe2-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
-
-/*
- *
- * dbits 52(%ebp)
- * alpha 48(%ebp)
- * FF_S 44(%ebp)
- *
- * d -32(%ebp) -> %edi
- * i -60(%ebp) -> %esi
- * sx -64(%ebp) -> %ebx
- * sy -68(%ebp)
- * s -72(%ebp)
- *
- * %mm0 a a a a
- * %mm1 FgA
- * %mm2 SumFgA
- * %mm3 a a a 255
- * %mm4
-*/
-
- .align 2
-.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
- .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,@function
-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n:
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- subl $72, %esp
- pushl %edi
- pushl %esi
-
-/* Load %mm7 with [0 0 0 0] */
- movl $0, %eax
- movd %eax, %mm7
-
-/* Load %mm6 with [128 128 128 128] */
- movl $0x80808080, %eax
- movd %eax, %mm6
- punpcklbw %mm7, %mm6
-
-/* Load %mm5 with [255 255 255 255] */
- movl $0xffffffff, %eax
- movd %eax, %mm5
- punpcklbw %mm7, %mm5
-
-/* Load %mm0 with [a a a a] */
- movzbl 48(%ebp), %eax
- movd %eax, %mm0
- punpcklwd %mm0, %mm0
- punpckldq %mm0, %mm0
-
- movl $1, %eax
- movzbl 52(%ebp), %ecx
- sall %cl, %eax
- movl %eax, -8(%ebp)
- movl 8(%ebp), %eax
- movl %eax, -12(%ebp)
- movl 40(%ebp), %eax
- addl $16, %eax
- movl (%eax), %eax
- movl %eax, -16(%ebp)
- movl 40(%ebp), %eax
- addl $20, %eax
- movl (%eax), %eax
- movl %eax, -20(%ebp)
- movl $0, -28(%ebp)
-.L44:
- movl -28(%ebp), %eax
- cmpl 16(%ebp), %eax
- jl .L47
- jmp .exit_n
-.L47:
- movl -12(%ebp), %eax
- movl %eax, -32(%ebp)
- movl -16(%ebp), %eax
- movl %eax, -36(%ebp)
- movl -20(%ebp), %eax
- movl %eax, -40(%ebp)
- movl $0, -24(%ebp)
-.L48:
- movl -24(%ebp), %eax
- cmpl 12(%ebp), %eax
- jl .L51
- jmp .L49
-.L51:
-
-/* Zero accumulator */
- movq %mm7, %mm2
-
-/* Set i to dptr (size - 1) */
- movl -8(%ebp), %esi
- sub $1, %esi
- shll $3, %esi
-
- movl 44(%ebp), %edi
- movl -36(%ebp), %ecx
-
-.for_i_n:
- movl (%edi,%esi), %ebx
- addl %ecx, %ebx
-/* Test negative before shift */
- cmpl $0, %ebx
- js .next_i_n
- sarl $12, %ebx
- cmpl 28(%ebp), %ebx
- jge .next_i_n
-/* We multiply sx by 4 here */
- shll $2, %ebx
-
- movl 4(%edi,%esi), %eax
- addl -40(%ebp), %eax
-/* Test negative before shift */
- cmpl $0, %eax
- js .next_i_n
- sarl $12, %eax
- cmpl 32(%ebp), %eax
- jge .next_i_n
-/* We multiply sy by srs here */
- imull 36(%ebp), %eax
-
- addl %ebx, %eax
- addl 24(%ebp), %eax
-
-/* Fg -> %mm1 */
- movl (%eax), %eax
- testl $0xff000000, %eax
- jz .next_i_n
- movd %eax, %mm1
- punpcklbw %mm7, %mm1
-
-/* [a a a 255] -> %mm3 */
- shrl $24, %eax
- movl $0x10101, %edx
- mull %edx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm7, %mm3
-
-/* [Fg * a] -> mm1 */
- pmullw %mm3, %mm1
- paddw %mm6, %mm1
- movq %mm1, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm1
- psrlw $8, %mm1
-
-/* Add to accumulator */
- paddw %mm1, %mm2
-
-.next_i_n:
- subl $8, %esi
- jnb .for_i_n
-
-/* Divide components by sample size */
- movd 52(%ebp), %mm3
- psrlw %mm3, %mm2
-
-/* Multiply by alpha */
- pmullw %mm0, %mm2
- paddw %mm6, %mm2
- movq %mm2, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm2
- psrlw $8, %mm2
-
-/* [255 - FgA] -> mm1 */
- movq %mm2, %mm1
- punpckhwd %mm1, %mm1
- punpckhdq %mm1, %mm1
- pxor %mm5, %mm1
-
- movl -32(%ebp), %edi
-/* Bg -> mm3 */
- movd (%edi), %mm3
- punpcklbw %mm7, %mm3
-
-/* Fg + ((255 - FgA) * Bg) / 255 */
-
- pmullw %mm1, %mm3
- paddw %mm6, %mm3
- movq %mm3, %mm4
- psrlw $8, %mm4
- paddw %mm4, %mm3
- psrlw $8, %mm3
- paddw %mm2, %mm3
-
-/* Store pixel */
- packuswb %mm3, %mm3
- movd %mm3, (%edi)
-
-.L58:
- movl 40(%ebp), %eax
- movl (%eax), %edx
- leal -36(%ebp), %eax
- addl %edx, (%eax)
- movl 40(%ebp), %eax
- addl $4, %eax
- movl (%eax), %edx
- leal -40(%ebp), %eax
- addl %edx, (%eax)
- leal -32(%ebp), %eax
- addl $4, (%eax)
- leal -24(%ebp), %eax
- incl (%eax)
- jmp .L48
-.L49:
- movl 40(%ebp), %eax
- addl $8, %eax
- movl (%eax), %edx
- leal -16(%ebp), %eax
- addl %edx, (%eax)
- movl 40(%ebp), %eax
- addl $12, %eax
- movl (%eax), %edx
- leal -20(%ebp), %eax
- addl %edx, (%eax)
- movl 20(%ebp), %edx
- leal -12(%ebp), %eax
- addl %edx, (%eax)
- leal -28(%ebp), %eax
- incl (%eax)
- jmp .L44
-
-.exit_n:
- emms
- popl %esi
- popl %edi
- addl $72, %esp
- popl %ebx
- popl %ebp
- ret
-.Lfe3:
- .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,.Lfe3-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
- .ident "GCC: (GNU) 3.2"