/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store * sum in a third limb vector. * * Copyright (C) 1995, 1996, 1998, * 2001, 2002 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /******************* * mpi_limb_t * _gcry_mpih_add_n( mpi_ptr_t res_ptr, * mpi_ptr_t s1_ptr, * mpi_ptr_t s2_ptr, * mpi_size_t size) */ ! INPUT PARAMETERS #define res_ptr %o0 #define s1_ptr %o1 #define s2_ptr %o2 #define size %o3 #include "sysdep.h" .text .align 4 .global C_SYMBOL_NAME(_gcry_mpih_add_n) C_SYMBOL_NAME(_gcry_mpih_add_n): xor s2_ptr,res_ptr,%g1 andcc %g1,4,%g0 bne L1 ! branch if alignment differs nop ! ** V1a ** L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 be L_v1 ! if no, branch nop /* Add least significant limb separately to align res_ptr and s2_ptr */ ld [s1_ptr],%g4 add s1_ptr,4,s1_ptr ld [s2_ptr],%g2 add s2_ptr,4,s2_ptr add size,-1,size addcc %g4,%g2,%o4 st %o4,[res_ptr] add res_ptr,4,res_ptr L_v1: addx %g0,%g0,%o4 ! save cy in register cmp size,2 ! if size < 2 ... bl Lend2 ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy ld [s1_ptr+0],%g4 addcc size,-10,size ld [s1_ptr+4],%g1 ldd [s2_ptr+0],%g2 blt Lfin1 subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ Loop1: addxcc %g4,%g2,%o4 ld [s1_ptr+8],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+12],%g1 ldd [s2_ptr+8],%g2 std %o4,[res_ptr+0] addxcc %g4,%g2,%o4 ld [s1_ptr+16],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+20],%g1 ldd [s2_ptr+16],%g2 std %o4,[res_ptr+8] addxcc %g4,%g2,%o4 ld [s1_ptr+24],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+28],%g1 ldd [s2_ptr+24],%g2 std %o4,[res_ptr+16] addxcc %g4,%g2,%o4 ld [s1_ptr+32],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+36],%g1 ldd [s2_ptr+32],%g2 std %o4,[res_ptr+24] addx %g0,%g0,%o4 ! save cy in register addcc size,-8,size add s1_ptr,32,s1_ptr add s2_ptr,32,s2_ptr add res_ptr,32,res_ptr bge Loop1 subcc %g0,%o4,%g0 ! restore cy Lfin1: addcc size,8-2,size blt Lend1 subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 2 limbs until less than 2 limbs remain */ Loope1: addxcc %g4,%g2,%o4 ld [s1_ptr+8],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+12],%g1 ldd [s2_ptr+8],%g2 std %o4,[res_ptr+0] addx %g0,%g0,%o4 ! save cy in register addcc size,-2,size add s1_ptr,8,s1_ptr add s2_ptr,8,s2_ptr add res_ptr,8,res_ptr bge Loope1 subcc %g0,%o4,%g0 ! restore cy Lend1: addxcc %g4,%g2,%o4 addxcc %g1,%g3,%o5 std %o4,[res_ptr+0] addx %g0,%g0,%o4 ! save cy in register andcc size,1,%g0 be Lret1 subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ ld [s1_ptr+8],%g4 ld [s2_ptr+8],%g2 addxcc %g4,%g2,%o4 st %o4,[res_ptr+8] Lret1: retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb L1: xor s1_ptr,res_ptr,%g1 andcc %g1,4,%g0 bne L2 nop ! ** V1b ** mov s2_ptr,%g1 mov s1_ptr,s2_ptr b L0 mov %g1,s1_ptr ! ** V2 ** /* If we come here, the alignment of s1_ptr and res_ptr as well as the alignment of s2_ptr and res_ptr differ. Since there are only two ways things can be aligned (that we care about) we now know that the alignment of s1_ptr and s2_ptr are the same. */ L2: cmp size,1 be Ljone nop andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 be L_v2 ! if no, branch nop /* Add least significant limb separately to align s1_ptr and s2_ptr */ ld [s1_ptr],%g4 add s1_ptr,4,s1_ptr ld [s2_ptr],%g2 add s2_ptr,4,s2_ptr add size,-1,size addcc %g4,%g2,%o4 st %o4,[res_ptr] add res_ptr,4,res_ptr L_v2: addx %g0,%g0,%o4 ! save cy in register addcc size,-8,size blt Lfin2 subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ Loop2: ldd [s1_ptr+0],%g2 ldd [s2_ptr+0],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+0] addxcc %g3,%o5,%g3 st %g3,[res_ptr+4] ldd [s1_ptr+8],%g2 ldd [s2_ptr+8],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+8] addxcc %g3,%o5,%g3 st %g3,[res_ptr+12] ldd [s1_ptr+16],%g2 ldd [s2_ptr+16],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+16] addxcc %g3,%o5,%g3 st %g3,[res_ptr+20] ldd [s1_ptr+24],%g2 ldd [s2_ptr+24],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+24] addxcc %g3,%o5,%g3 st %g3,[res_ptr+28] addx %g0,%g0,%o4 ! save cy in register addcc size,-8,size add s1_ptr,32,s1_ptr add s2_ptr,32,s2_ptr add res_ptr,32,res_ptr bge Loop2 subcc %g0,%o4,%g0 ! restore cy Lfin2: addcc size,8-2,size blt Lend2 subcc %g0,%o4,%g0 ! restore cy Loope2: ldd [s1_ptr+0],%g2 ldd [s2_ptr+0],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+0] addxcc %g3,%o5,%g3 st %g3,[res_ptr+4] addx %g0,%g0,%o4 ! save cy in register addcc size,-2,size add s1_ptr,8,s1_ptr add s2_ptr,8,s2_ptr add res_ptr,8,res_ptr bge Loope2 subcc %g0,%o4,%g0 ! restore cy Lend2: andcc size,1,%g0 be Lret2 subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ Ljone: ld [s1_ptr],%g4 ld [s2_ptr],%g2 addxcc %g4,%g2,%o4 st %o4,[res_ptr] Lret2: retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb