--- /Users/yonas/ffmpeg-r29748-orig/libavcodec/arm/dsputil_arm_s.S	2009-10-03 20:58:30.000000000 -0400
+++ /Users/yonas/ffmpeg-r29748-orig/libavcodec/arm/dsputil_arm_s.S	2009-10-03 19:13:34.000000000 -0400
@@ -22,7 +22,7 @@
 #include "config.h"
 #include "asm.S"
 
-        preserve8
+        //preserve8
 
 #if !HAVE_PLD
 .macro pld reg
@@ -36,67 +36,64 @@
         add             r0,  r0,  r1
         bne             ff_prefetch_arm
         bx              lr
-        .endfunc
+        @.endfunc
 #endif
 
-.macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
-        mov             \Rd0, \Rn0, lsr #(\shift * 8)
-        mov             \Rd1, \Rn1, lsr #(\shift * 8)
-        mov             \Rd2, \Rn2, lsr #(\shift * 8)
-        mov             \Rd3, \Rn3, lsr #(\shift * 8)
-        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
-        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
-        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
-        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.macro  ALIGN_QWORD_D
+        mov $1, $5, lsr #($0 * 8)
+        mov $2, $6, lsr #($0 * 8)
+        mov $3, $7, lsr #($0 * 8)
+        mov $4, $8, lsr #($0 * 8)
+        orr $1, $1, $6, lsl #(32 - $0 * 8)
+        orr $2, $2, $7, lsl #(32 - $0 * 8)
+        orr $3, $3, $8, lsl #(32 - $0 * 8)
+        orr $4, $4, $9, lsl #(32 - $0 * 8)
 .endm
-.macro  ALIGN_DWORD shift, R0, R1, R2
-        mov             \R0, \R0, lsr #(\shift * 8)
-        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
-        mov             \R1, \R1, lsr #(\shift * 8)
-        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.macro  ALIGN_DWORD
+        mov $1, $1, lsr #($0 * 8)
+        orr $1, $1, $2, lsl #(32 - $0 * 8)
+        mov $2, $2, lsr #($0 * 8)
+        orr $2, $2, $3, lsl #(32 - $0 * 8)
 .endm
-.macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
-        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
-        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
-        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
-        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.macro  ALIGN_DWORD_D
+        mov $1, $3, lsr #($0 * 8)
+        mov $2, $4, lsr #($0 * 8)
+        orr $1, $1, $4, lsl #(32 - ($0 * 8))
+        orr $2, $2, $5, lsl #(32 - ($0 * 8))
 .endm
-
-.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+.macro  RND_AVG32 
         @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
         @ Rmask = 0xFEFEFEFE
         @ Rn = destroy
-        eor             \Rd0, \Rn0, \Rm0
-        eor             \Rd1, \Rn1, \Rm1
-        orr             \Rn0, \Rn0, \Rm0
-        orr             \Rn1, \Rn1, \Rm1
-        and             \Rd0, \Rd0, \Rmask
-        and             \Rd1, \Rd1, \Rmask
-        sub             \Rd0, \Rn0, \Rd0, lsr #1
-        sub             \Rd1, \Rn1, \Rd1, lsr #1
+        eor $0, $2, $4
+        eor $1, $3, $5
+        orr $2, $2, $4
+        orr $3, $3, $5
+        and $0, $0, $6
+        and $1, $1, $6
+        sub $0, $2, $0, lsr #1
+        sub $1, $3, $1, lsr #1
 .endm
-
-.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+.macro  NO_RND_AVG32
         @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
         @ Rmask = 0xFEFEFEFE
         @ Rn = destroy
-        eor             \Rd0, \Rn0, \Rm0
-        eor             \Rd1, \Rn1, \Rm1
-        and             \Rn0, \Rn0, \Rm0
-        and             \Rn1, \Rn1, \Rm1
-        and             \Rd0, \Rd0, \Rmask
-        and             \Rd1, \Rd1, \Rmask
-        add             \Rd0, \Rn0, \Rd0, lsr #1
-        add             \Rd1, \Rn1, \Rd1, lsr #1
+        eor $0, $2, $4
+        eor $1, $3, $5
+        and $2, $2, $4
+        and $3, $3, $5
+        and $0, $0, $6
+        and $1, $1, $6
+        add $0, $2, $0, lsr #1
+        add $1, $3, $1, lsr #1
 .endm
-
 .macro  JMP_ALIGN tmp, reg
-        ands            \tmp, \reg, #3
-        bic             \reg, \reg, #3
+        ands $0, $1, #3
+        bic  $1, $1, #3
         beq             1f
-        subs            \tmp, \tmp, #1
+        subs $0, $0, #1
         beq             2f
-        subs            \tmp, \tmp, #1
+        subs $0, $0, #1
         beq             3f
         b    4f
 .endm
@@ -107,51 +104,51 @@
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r11, lr}
+        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r7}
+        ldmia r1, {r4-r7}
         add             r1,  r1,  r2
-        stm             r0,  {r4-r7}
+        stmia r0, {r4-r7}
         pld             [r1]
         subs            r3,  r3,  #1
         add             r0,  r0,  r2
         bne             1b
-        pop             {r4-r11, pc}
+        ldmfd sp!, {r4-r11, pc}
         .align 5
 2:
-        ldm             r1,  {r4-r8}
+        ldmia r1, {r4-r8}
         add             r1,  r1,  r2
-        ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
+        ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r9-r12}
+        stmia r0, {r9-r12}
         add             r0,  r0,  r2
         bne             2b
-        pop             {r4-r11, pc}
+        ldmfd sp!, {r4-r11, pc}
         .align 5
 3:
-        ldm             r1,  {r4-r8}
+        ldmia r1, {r4-r8}
         add             r1,  r1,  r2
-        ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
+        ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r9-r12}
+        stmia r0, {r9-r12}
         add             r0,  r0,  r2
         bne             3b
-        pop             {r4-r11, pc}
+        ldmfd sp!, {r4-r11, pc}
         .align 5
 4:
-        ldm             r1,  {r4-r8}
+        ldmia r1, {r4-r8}
         add             r1,  r1,  r2
-        ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
+        ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r9-r12}
+        stmia r0, {r9-r12}
         add             r0,  r0,  r2
         bne             4b
-        pop             {r4-r11,pc}
-        .endfunc
+        ldmfd sp!, {r4-r11,pc}
+        @.endfunc
 
 @ ----------------------------------------------------------------
         .align 5
@@ -159,51 +156,51 @@
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r5,lr}
+        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r5}
+        ldmia r1, {r4-r5}
         add             r1,  r1,  r2
         subs            r3,  r3,  #1
         pld             [r1]
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             1b
-        pop             {r4-r5,pc}
+        ldmfd sp!, {r4-r5,pc}
         .align 5
 2:
-        ldm             r1,  {r4-r5, r12}
+        ldmia r1, {r4-r5, r12}
         add             r1,  r1,  r2
-        ALIGN_DWORD     1,   r4,  r5,  r12
+        ALIGN_DWORD 1, r4, r5, r12
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             2b
-        pop             {r4-r5,pc}
+        ldmfd sp!, {r4-r5,pc}
         .align 5
 3:
-        ldm             r1,  {r4-r5, r12}
+        ldmia r1, {r4-r5, r12}
         add             r1,  r1,  r2
-        ALIGN_DWORD     2,   r4,  r5,  r12
+        ALIGN_DWORD 2, r4, r5, r12
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             3b
-        pop             {r4-r5,pc}
+        ldmfd sp!, {r4-r5,pc}
         .align 5
 4:
-        ldm             r1,  {r4-r5, r12}
+        ldmia r1, {r4-r5, r12}
         add             r1,  r1,  r2
-        ALIGN_DWORD     3,   r4,  r5,  r12
+        ALIGN_DWORD 3, r4, r5, r12
         pld             [r1]
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             4b
-        pop             {r4-r5,pc}
-        .endfunc
+        ldmfd sp!, {r4-r5,pc}
+        @.endfunc
 
 @ ----------------------------------------------------------------
         .align 5
@@ -211,118 +208,128 @@
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r10,lr}
-        ldr             r12, =0xfefefefe
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        @ldr r12, =0xfefefefe
+        adr r12, 5f
+	ldr r12, [r12, #0]
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
+        ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
         pld             [r1]
         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             1b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 2:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
-        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
+        ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
+        ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
         pld             [r1]
         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             2b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 3:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
-        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
+        ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
+        ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
         pld             [r1]
         RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             3b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 4:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
+        ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
         pld             [r1]
         RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             4b
-        pop             {r4-r10,pc}
-        .endfunc
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 5
+5:
+        .long 0xFEFEFEFE
+        @.endfunc
 
         .align 5
 function put_no_rnd_pixels8_x2_arm, export=1
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r10,lr}
-        ldr             r12, =0xfefefefe
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        @ldr r12, =0xfefefefe
+        adr r12, 5f
+	ldr r12, [r12, #0]
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
+        ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
         pld             [r1]
         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             1b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 2:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
-        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
+        ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
+        ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
         pld             [r1]
         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             2b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 3:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
-        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
+        ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
+        ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
         pld             [r1]
         NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bne             3b
-        pop             {r4-r10,pc}
+        ldmfd sp!, {r4-r10,pc}
         .align 5
 4:
-        ldm             r1,  {r4-r5, r10}
+        ldmia r1, {r4-r5, r10}
         add             r1,  r1,  r2
-        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
+        ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
         pld             [r1]
         NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             4b
-        pop             {r4-r10,pc}
-        .endfunc
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 5
+5:
+        .long 0xFEFEFEFE
+        @.endfunc
 
 
 @ ----------------------------------------------------------------
@@ -331,200 +338,212 @@
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r11,lr}
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
         mov             r3,  r3,  lsr #1
-        ldr             r12, =0xfefefefe
+        @ldr r12, =0xfefefefe
+        adr r12, 5f
+	ldr r12, [r12, #0]
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r5}
+        ldmia r1, {r4-r5}
         add             r1,  r1,  r2
-6:      ldm             r1,  {r6-r7}
+6:      ldmia r1, {r6-r7}
         add             r1,  r1,  r2
         pld             [r1]
         RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
-        ldm             r1,  {r4-r5}
+        ldmia r1, {r4-r5}
         add             r1,  r1,  r2
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         pld             [r1]
         RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 2:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r7,  r8,  r9
+        ALIGN_DWORD 1, r7, r8, r9
         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r4,  r5,  r6
+        ALIGN_DWORD 1, r4, r5, r6
         subs            r3,  r3,  #1
         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 3:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r7,  r8,  r9
+        ALIGN_DWORD 2, r7, r8, r9
         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r4,  r5,  r6
+        ALIGN_DWORD 2, r4, r5, r6
         subs            r3,  r3,  #1
         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 4:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r7,  r8,  r9
+        ALIGN_DWORD 3, r7, r8, r9
         RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r4,  r5,  r6
+        ALIGN_DWORD 3, r4, r5, r6
         subs            r3,  r3,  #1
         RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
-        .endfunc
+        ldmfd sp!, {r4-r11,pc}
+        .align 5
+5:
+        .long 0xfefefefe
+
+        @.endfunc
 
         .align 5
 function put_no_rnd_pixels8_y2_arm, export=1
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r11,lr}
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
         mov             r3,  r3,  lsr #1
-        ldr             r12, =0xfefefefe
+        @ldr r12, =0xfefefefe
+        adr r12, 5f
+	ldr r12, [r12, #0]
         JMP_ALIGN       r5,  r1
 1:
-        ldm             r1,  {r4-r5}
+        ldmia r1, {r4-r5}
         add             r1,  r1,  r2
-6:      ldm             r1,  {r6-r7}
+6:      ldmia r1, {r6-r7}
         add             r1,  r1,  r2
         pld             [r1]
         NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
-        ldm             r1,  {r4-r5}
+        ldmia r1, {r4-r5}
         add             r1,  r1,  r2
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         pld             [r1]
         NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
         subs            r3,  r3,  #1
-        stm             r0,  {r8-r9}
+        stmia r0, {r8-r9}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 2:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r7,  r8,  r9
+        ALIGN_DWORD 1, r7, r8, r9
         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     1,   r4,  r5,  r6
+        ALIGN_DWORD 1, r4, r5, r6
         subs            r3,  r3,  #1
         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 3:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r7,  r8,  r9
+        ALIGN_DWORD 2, r7, r8, r9
         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     2,   r4,  r5,  r6
+        ALIGN_DWORD 2, r4, r5, r6
         subs            r3,  r3,  #1
         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
         .align 5
 4:
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r4,  r5,  r6
-6:      ldm             r1,  {r7-r9}
+        ALIGN_DWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r7,  r8,  r9
+        ALIGN_DWORD 3, r7, r8, r9
         NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
-        ldm             r1,  {r4-r6}
+        ldmia r1, {r4-r6}
         add             r1,  r1,  r2
         pld             [r1]
-        ALIGN_DWORD     3,   r4,  r5,  r6
+        ALIGN_DWORD 3, r4, r5, r6
         subs            r3,  r3,  #1
         NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
-        stm             r0,  {r10-r11}
+        stmia r0, {r10-r11}
         add             r0,  r0,  r2
         bne             6b
-        pop             {r4-r11,pc}
-        .endfunc
+        ldmfd sp!, {r4-r11,pc}
+        .align 5
+5:
+        .long 0xfefefefe
+        @.endfunc
 
-        .ltorg
+        @.ltorg
 
 @ ----------------------------------------------------------------
+
 .macro  RND_XY2_IT align, rnd
         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
@@ -528,65 +547,68 @@
 .macro  RND_XY2_IT align, rnd
         @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
         @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
-.if \align == 0
-        ldm             r1,  {r6-r8}
-.elseif \align == 3
-        ldm             r1,  {r5-r7}
+.if $0 == 0
+        ldmia r1, {r6-r8}
+.elseif $0 == 3
+        ldmia r1, {r5-r7}
 .else
-        ldm             r1,  {r8-r10}
+        ldmia r1, {r8-r10}
 .endif
         add             r1,  r1,  r2
         pld             [r1]
-.if \align == 0
-        ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
-.elseif \align == 1
-        ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
-        ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
-.elseif \align == 2
-        ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
-        ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
-.elseif \align == 3
-        ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
+.if $0 == 0
+        ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
+.elseif $0 == 1
+        ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
+        ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
+.elseif $0 == 2
+        ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
+        ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
+.elseif $0 == 3
+        ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
 .endif
-        ldr             r14, =0x03030303
+        @ldr r14, =0x03030303
+	ldr r14, [r12, #0]
         tst             r3,  #1
         and             r8,  r4,  r14
         and             r9,  r5,  r14
         and             r10, r6,  r14
         and             r11, r7,  r14
-        andeq           r14, r14, r14, \rnd #1
+        andeq r14, r14, r14, $1 #1
         add             r8,  r8,  r10
         add             r9,  r9,  r11
-        ldr             r12, =0xfcfcfcfc >> 2
         addeq           r8,  r8,  r14
         addeq           r9,  r9,  r14
-        and             r4,  r12, r4,  lsr #2
-        and             r5,  r12, r5,  lsr #2
-        and             r6,  r12, r6,  lsr #2
-        and             r7,  r12, r7,  lsr #2
+        @ldr r12, =0xfcfcfcfc >> 2
+	ldr r14, [r12, #4]     @ 0xFCFCFCFC >> 2
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        and r6, r14, r6, lsr #2
+        and r7, r14, r7, lsr #2
         add             r10, r4,  r6
         add             r11, r5,  r7
         subs            r3,  r3,  #1
 .endm
 
 .macro RND_XY2_EXPAND align, rnd
-        RND_XY2_IT      \align, \rnd
-6:      push            {r8-r11}
-        RND_XY2_IT      \align, \rnd
-        pop             {r4-r7}
+        RND_XY2_IT $0, $1
+6:      stmfd sp!, {r8-r11}
+        RND_XY2_IT $0, $1
+        ldmfd sp!, {r4-r7}
         add             r4,  r4,  r8
         add             r5,  r5,  r9
-        ldr             r14, =0x0f0f0f0f
+        @ldr r14, =0x0f0f0f0f
+	ldr r14, [r12, #8]
         add             r6,  r6,  r10
         add             r7,  r7,  r11
         and             r4,  r14, r4,  lsr #2
         and             r5,  r14, r5,  lsr #2
         add             r4,  r4,  r6
         add             r5,  r5,  r7
-        stm             r0,  {r4-r5}
+        stmia r0, {r4-r5}
         add             r0,  r0,  r2
         bge             6b
-        pop             {r4-r11,pc}
+        ldmfd sp!, {r4-r11,pc}
 .endm
 
         .align 5
@@ -594,7 +616,8 @@
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r11,lr} @ R14 is also called LR
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+	adr r12, 5f
         JMP_ALIGN       r5,  r1
 1:
         RND_XY2_EXPAND  0, lsl
@@ -610,14 +633,21 @@
         .align 5
 4:
         RND_XY2_EXPAND  3, lsl
-        .endfunc
+
+        .align 5
+5:
+        .long 0x03030303
+        .long 0xFCFCFCFC >> 2
+        .long 0x0F0F0F0F
 
         .align 5
 function put_no_rnd_pixels8_xy2_arm, export=1
         @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
         @ block = word aligned, pixles = unaligned
         pld             [r1]
-        push            {r4-r11,lr}
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+	adr r12, 5f
+
         JMP_ALIGN       r5,  r1
 1:
         RND_XY2_EXPAND  0, lsr
@@ -633,12 +663,19 @@
         .align 5
 4:
         RND_XY2_EXPAND  3, lsr
-        .endfunc
+
+        .align 5
+5:
+	.long 0x03030303
+        .long 0xFCFCFCFC >> 2
+        .long 0x0F0F0F0F
+        @.endfunc
 
         .align 5
 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
 function ff_add_pixels_clamped_ARM, export=1
-        push            {r4-r10}
+        @push            {r4-r10}
+	stmfd           sp!, {r4-r10}
         mov             r10, #8
 1:
         ldr             r4,  [r1]               /* load dest */
@@ -721,6 +758,8 @@
         add             r1,  r1,  r2
         bne             1b
 
-        pop             {r4-r10}
+        @pop             {r4-r10}
+	ldmfd           sp!, {r4-r10}
         bx              lr
-        .endfunc
+        @.endfunc
+
diff -wBNru /Users/yonas/ffmpeg-r29748-orig/libavcodec/arm/simple_idct_armv6.S /Users/yonas/ffmpeg-r29748-iphone/libavcodec/arm/simple_idct_armv6.S
--- /Users/yonas/ffmpeg-r29748-orig/libavcodec/arm/simple_idct_armv6.S	2009-01-30 19:37:45.000000000 -0500
+++ /Users/yonas/ffmpeg-r29748-iphone/libavcodec/arm/simple_idct_armv6.S	2009-01-30 20:30:18.000000000 -0500
@@ -59,9 +59,9 @@
 
   Output in registers r4--r11
 */
-        .macro idct_row shift
+        .macro idct_row
         ldr    lr, w46               /* lr  = W4 | (W6 << 16) */
-        mov    r1, #(1<<(\shift-1))
+        mov    r1, #(1<<($0-1))
         smlad  r4, r2, ip, r1
         smlsd  r7, r2, ip, r1
         ldr    ip, w13               /* ip  = W1 | (W3 << 16) */
@@ -100,10 +100,10 @@
 
   Output in registers r4--r11
 */
-        .macro idct_row4 shift
+        .macro idct_row4
         ldr    lr, w46               /* lr =  W4 | (W6 << 16) */
         ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
-        mov    r1, #(1<<(\shift-1))
+        mov    r1, #(1<<($0-1))
         smlad  r4, r2, ip, r1
         smlsd  r7, r2, ip, r1
         ldr    ip, w13               /* ip =  W1 | (W3 << 16) */
@@ -138,28 +138,29 @@
   shift = right-shift amount
   Input/output in registers r4--r11
 */
-        .macro idct_finish_shift shift
+        .macro idct_finish_shift 
         add    r3, r4, r8            /* r3 = A0 + B0 */
         sub    r2, r4, r8            /* r2 = A0 - B0 */
-        mov    r4, r3, asr #\shift
-        mov    r8, r2, asr #\shift
+        mov    r4, r3, asr $0
+        mov    r8, r2, asr $0
 
         sub    r3, r5, r9            /* r3 = A1 + B1 */
         add    r2, r5, r9            /* r2 = A1 - B1 */
-        mov    r5, r3, asr #\shift
-        mov    r9, r2, asr #\shift
+        mov    r5, r3, asr $0
+        mov    r9, r2, asr $0
 
         add    r3, r6, r10           /* r3 = A2 + B2 */
         sub    r2, r6, r10           /* r2 = A2 - B2 */
-        mov    r6, r3, asr #\shift
-        mov    r10,r2, asr #\shift
+        mov    r6, r3, asr $0
+        mov    r10,r2, asr $0
 
         add    r3, r7, r11           /* r3 = A3 + B3 */
         sub    r2, r7, r11           /* r2 = A3 - B3 */
-        mov    r7, r3, asr #\shift
-        mov    r11,r2, asr #\shift
+        mov    r7, r3, asr $0
+        mov    r11,r2, asr $0
         .endm
 
+
 /*
   Compute final part of IDCT single row, saturating results at 8 bits.
   shift = right-shift amount
@@ -165,26 +166,26 @@
   shift = right-shift amount
   Input/output in registers r4--r11
 */
-        .macro idct_finish_shift_sat shift
+        .macro idct_finish_shift_sat 
         add    r3, r4, r8            /* r3 = A0 + B0 */
         sub    ip, r4, r8            /* ip = A0 - B0 */
-        usat   r4, #8, r3, asr #\shift
-        usat   r8, #8, ip, asr #\shift
+        usat   r4, #8, r3, asr $0
+        usat   r8, #8, ip, asr $0
 
         sub    r3, r5, r9            /* r3 = A1 + B1 */
         add    ip, r5, r9            /* ip = A1 - B1 */
-        usat   r5, #8, r3, asr #\shift
-        usat   r9, #8, ip, asr #\shift
+        usat   r5, #8, r3, asr $0
+        usat   r9, #8, ip, asr $0
 
         add    r3, r6, r10           /* r3 = A2 + B2 */
         sub    ip, r6, r10           /* ip = A2 - B2 */
-        usat   r6, #8, r3, asr #\shift
-        usat   r10,#8, ip, asr #\shift
+        usat   r6, #8, r3, asr $0
+        usat   r10,#8, ip, asr $0
 
         add    r3, r7, r11           /* r3 = A3 + B3 */
         sub    ip, r7, r11           /* ip = A3 - B3 */
-        usat   r7, #8, r3, asr #\shift
-        usat   r11,#8, ip, asr #\shift
+        usat   r7, #8, r3, asr $0
+        usat   r11,#8, ip, asr $0
         .endm
 
 /*
@@ -193,7 +194,7 @@
   r1 = dest
 */
 function idct_row_armv6
-        push   {lr}
+        str    lr, [sp, #-4]!
 
         ldr    lr, [r0, #12]         /* lr = row[7,5] */
         ldr    ip, [r0, #4]          /* ip = row[6,4] */
@@ -203,7 +204,7 @@
         cmpeq  lr, r3
         cmpeq  lr, r2, lsr #16
         beq    1f
-        push   {r1}
+        str    r1, [sp, #-4]!
         ldr    ip, w42               /* ip = W4 | (W2 << 16) */
         cmp    lr, #0
         beq    2f
@@ -213,8 +214,8 @@
 
 2:      idct_row4  ROW_SHIFT
 
-3:      pop    {r1}
-        idct_finish_shift ROW_SHIFT
+3:      ldr    r1, [sp], #4
+        idct_finish_shift #ROW_SHIFT
 
         strh   r4, [r1]
         strh   r5, [r1, #(16*2)]
@@ -225,7 +226,7 @@
         strh   r9, [r1, #(16*5)]
         strh   r8, [r1, #(16*7)]
 
-        pop    {pc}
+        ldr    pc, [sp], #4
 
 1:      mov    r2, r2, lsl #3
         strh   r2, [r1]
@@ -236,8 +237,7 @@
         strh   r2, [r1, #(16*3)]
         strh   r2, [r1, #(16*5)]
         strh   r2, [r1, #(16*7)]
-        pop    {pc}
-        .endfunc
+        ldr    pc, [sp], #4
 
 /*
   Compute IDCT of single column, read as row.
@@ -245,14 +245,14 @@
   r1 = dest
 */
 function idct_col_armv6
-        push   {r1, lr}
+        stmfd  sp!, {r1, lr}
 
         ldr    r2, [r0]              /* r2 = row[2,0] */
         ldr    ip, w42               /* ip = W4 | (W2 << 16) */
         ldr    r3, [r0, #8]          /* r3 = row[3,1] */
         idct_row COL_SHIFT
-        pop    {r1}
-        idct_finish_shift COL_SHIFT
+        ldr    r1, [sp], #4
+        idct_finish_shift #COL_SHIFT
 
         strh   r4, [r1]
         strh   r5, [r1, #(16*1)]
@@ -263,8 +263,7 @@
         strh   r9, [r1, #(16*6)]
         strh   r8, [r1, #(16*7)]
 
-        pop    {pc}
-        .endfunc
+        ldr    pc, [sp], #4
 
 /*
   Compute IDCT of single column, read as row, store saturated 8-bit.
@@ -273,14 +272,14 @@
   r2 = line size
 */
 function idct_col_put_armv6
-        push   {r1, r2, lr}
+        stmfd  sp!, {r1, r2, lr}
 
         ldr    r2, [r0]              /* r2 = row[2,0] */
         ldr    ip, w42               /* ip = W4 | (W2 << 16) */
         ldr    r3, [r0, #8]          /* r3 = row[3,1] */
         idct_row COL_SHIFT
-        pop    {r1, r2}
-        idct_finish_shift_sat COL_SHIFT
+        ldmfd  sp!, {r1, r2}
+        idct_finish_shift_sat #COL_SHIFT
 
         strb   r4, [r1], r2
         strb   r5, [r1], r2
@@ -293,8 +292,7 @@
 
         sub    r1, r1, r2, lsl #3
 
-        pop    {pc}
-        .endfunc
+        ldr    pc, [sp], #4
 
 /*
   Compute IDCT of single column, read as row, add/store saturated 8-bit.
@@ -303,13 +301,13 @@
   r2 = line size
 */
 function idct_col_add_armv6
-        push   {r1, r2, lr}
+        stmfd  sp!, {r1, r2, lr}
 
         ldr    r2, [r0]              /* r2 = row[2,0] */
         ldr    ip, w42               /* ip = W4 | (W2 << 16) */
         ldr    r3, [r0, #8]          /* r3 = row[3,1] */
         idct_row COL_SHIFT
-        pop    {r1, r2}
+        ldmfd  sp!, {r1, r2}
         idct_finish
 
         ldrb   r3, [r1]
@@ -348,86 +346,83 @@
 
         sub    r1, r1, r2, lsl #3
 
-        pop    {pc}
-        .endfunc
+        ldr    pc, [sp], #4
 
 /*
   Compute 8 IDCT row transforms.
   func = IDCT row->col function
   width = width of columns in bytes
 */
-        .macro idct_rows func width
-        bl     \func
+        .macro idct_rows
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         sub    r0, r0, #(16*5)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
         add    r0, r0, #(16*2)
-        add    r1, r1, #\width
-        bl     \func
+        add    r1, r1, $1
+        bl     $0
 
         sub    r0, r0, #(16*7)
         .endm
 
 /* void ff_simple_idct_armv6(DCTELEM *data); */
 function ff_simple_idct_armv6, export=1
-        push   {r4-r11, lr}
+        stmfd  sp!, {r4-r11, lr}
         sub    sp, sp, #128
 
         mov    r1, sp
-        idct_rows idct_row_armv6, 2
+        idct_rows _idct_row_armv6, #2
         mov    r1, r0
         mov    r0, sp
-        idct_rows idct_col_armv6, 2
+        idct_rows _idct_col_armv6, #2
 
         add    sp, sp, #128
-        pop    {r4-r11, pc}
-        .endfunc
+        ldmfd  sp!, {r4-r11, pc}
 
 /* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
 function ff_simple_idct_add_armv6, export=1
-        push   {r0, r1, r4-r11, lr}
+        stmfd  sp!, {r0, r1, r4-r11, lr}
         sub    sp, sp, #128
 
         mov    r0, r2
         mov    r1, sp
-        idct_rows idct_row_armv6, 2
+        idct_rows _idct_row_armv6, #2
         mov    r0, sp
         ldr    r1, [sp, #128]
         ldr    r2, [sp, #(128+4)]
-        idct_rows idct_col_add_armv6, 1
+        idct_rows _idct_col_add_armv6, #1
 
         add    sp, sp, #(128+8)
-        pop    {r4-r11, pc}
-        .endfunc
+        ldmfd  sp!, {r4-r11, pc}
 
 /* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
 function ff_simple_idct_put_armv6, export=1
-        push   {r0, r1, r4-r11, lr}
+        stmfd  sp!, {r0, r1, r4-r11, lr}
         sub    sp, sp, #128
 
         mov    r0, r2
         mov    r1, sp
-        idct_rows idct_row_armv6, 2
+        idct_rows _idct_row_armv6, #2
         mov    r0, sp
         ldr    r1, [sp, #128]
         ldr    r2, [sp, #(128+4)]
-        idct_rows idct_col_put_armv6, 1
+        idct_rows _idct_col_put_armv6, #1
 
         add    sp, sp, #(128+8)
-        pop    {r4-r11, pc}
-        .endfunc
+        ldmfd  sp!, {r4-r11, pc}
+
diff -wBNru /Users/yonas/ffmpeg-r29748-orig/libavutil/internal.h /Users/yonas/ffmpeg-r29748-iphone/libavutil/internal.h
--- /Users/yonas/ffmpeg-r29748-orig/libavutil/internal.h	2009-08-07 19:25:56.000000000 -0400
+++ /Users/yonas/ffmpeg-r29748-iphone/libavutil/internal.h	2009-08-07 19:39:45.000000000 -0400
@@ -140,14 +140,14 @@
 #elif HAVE_ARMV6 && HAVE_INLINE_ASM
 static inline av_const int FASTDIV(int a, int b)
 {
-    int r, t;
-    __asm__ volatile("cmp     %3, #2               \n\t"
-                     "ldr     %1, [%4, %3, lsl #2] \n\t"
-                     "lsrle   %0, %2, #1           \n\t"
-                     "smmulgt %0, %1, %2           \n\t"
-                     : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    int r;
+    __asm__ volatile("cmp   %2, #0        \n\t"
+                     "smmul %0, %1, %2    \n\t"
+                     "rsblt %0, %0, #0    \n\t"
+                     : "=r"(r) : "r"(a), "r"(ff_inverse[b]));
     return r;
 }
+
 #elif ARCH_ARM && HAVE_INLINE_ASM
 static inline av_const int FASTDIV(int a, int b)
 {
diff -wBNru /Users/yonas/ffmpeg-r29748-orig/libavcodec/arm.S /Users/yonas/ffmpeg-r29748-iphone/libavcodec/arm.S
--- /Users/yonas/ffmpeg-r29748-iphone/libavcodec/arm/asm.S	2009-10-03 21:28:49.000000000 -0400
+++ /Users/yonas/ffmpeg-r29748-iphone/libavcodec/arm/asm.S	2009-10-03 21:38:54.000000000 -0400
@@ -35,13 +35,8 @@
         .endm
 
         .macro function name, export=0
-.if \export
-        .global EXTERN_ASM\name
-EXTERN_ASM\name:
-.endif
-ELF     .type   \name, %function
-        .func   \name
-\name:
+	.globl _$0
+	_$0:
         .endm
 
         .macro movrel rd, val
