mpi/pa7100/mpih-shift.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

/* hppa   rshift, lshift
 *	  optimized for the PA7100, where is runs at 3.25 cycles/limb
 *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
 *	Copyright (c) 1997 by Werner Koch (dd9jn)
 *
 * This file is part of G10.
 *
 * G10 is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * G10 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 *
 * Note: This code is heavily based on the GNU MP Library.
 *	 Actually it's the same code with only minor changes in the
 *	 way the data is stored; this is to support the abstraction
 *	 of an optional secure memory allocation which may be used
 *	 to avoid revealing of sensitive data due to paging etc.
 *	 The GNU MP Library itself is published under the LGPL;
 *	 however I decided to publish this code under the plain GPL.
 */


/*******************
 * mpi_limb_t
 * mpihelp_lshift( mpi_ptr_t wp,	(gr26)
 *		   mpi_ptr_t up,	(gr25)
 *		   mpi_size_t usize,	(gr24)
 *		   unsigned cnt)	(gr23)
 */

	.code
	.export 	mpihelp_lshift
mpihelp_lshift
	.proc
	.callinfo	frame=64,no_calls
	.entry

	sh2add		%r24,%r25,%r25
	sh2add		%r24,%r26,%r26
	ldws,mb 	-4(0,%r25),%r22
	subi		32,%r23,%r1
	mtsar		%r1
	addib,= 	-1,%r24,L$0004
	vshd		%r0,%r22,%r28		; compute carry out limb
	ldws,mb 	-4(0,%r25),%r29
	addib,<=	-5,%r24,L$rest
	vshd		%r22,%r29,%r20

L$loop	ldws,mb 	-4(0,%r25),%r22
	stws,mb 	%r20,-4(0,%r26)
	vshd		%r29,%r22,%r20
	ldws,mb 	-4(0,%r25),%r29
	stws,mb 	%r20,-4(0,%r26)
	vshd		%r22,%r29,%r20
	ldws,mb 	-4(0,%r25),%r22
	stws,mb 	%r20,-4(0,%r26)
	vshd		%r29,%r22,%r20
	ldws,mb 	-4(0,%r25),%r29
	stws,mb 	%r20,-4(0,%r26)
	addib,> 	-4,%r24,L$loop
	vshd		%r22,%r29,%r20

L$rest	addib,= 	4,%r24,L$end1
	nop
L$eloop ldws,mb 	-4(0,%r25),%r22
	stws,mb 	%r20,-4(0,%r26)
	addib,<=	-1,%r24,L$end2
	vshd		%r29,%r22,%r20
	ldws,mb 	-4(0,%r25),%r29
	stws,mb 	%r20,-4(0,%r26)
	addib,> 	-1,%r24,L$eloop
	vshd		%r22,%r29,%r20

L$end1	stws,mb 	%r20,-4(0,%r26)
	vshd		%r29,%r0,%r20
	bv		0(%r2)
	stw		%r20,-4(0,%r26)
L$end2	stws,mb 	%r20,-4(0,%r26)
L$0004	vshd		%r22,%r0,%r20
	bv		0(%r2)
	stw		%r20,-4(0,%r26)

	.exit
	.procend


/*******************
 * mpi_limb_t
 * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
 *		   mpi_ptr_t up,       (gr25)
 *		   mpi_size_t usize,   (gr24)
 *		   unsigned cnt)       (gr23)
 */

	.code
	.export 	mpihelp_rshift
mpihelp_rshift
	.proc
	.callinfo	frame=64,no_calls
	.entry

	ldws,ma 	4(0,%r25),%r22
	mtsar		%r23
	addib,= 	-1,%r24,L$r004
	vshd		%r22,%r0,%r28		; compute carry out limb
	ldws,ma 	4(0,%r25),%r29
	addib,<=	-5,%r24,L$rrest
	vshd		%r29,%r22,%r20

L$roop	ldws,ma 	4(0,%r25),%r22
	stws,ma 	%r20,4(0,%r26)
	vshd		%r22,%r29,%r20
	ldws,ma 	4(0,%r25),%r29
	stws,ma 	%r20,4(0,%r26)
	vshd		%r29,%r22,%r20
	ldws,ma 	4(0,%r25),%r22
	stws,ma 	%r20,4(0,%r26)
	vshd		%r22,%r29,%r20
	ldws,ma 	4(0,%r25),%r29
	stws,ma 	%r20,4(0,%r26)
	addib,> 	-4,%r24,L$roop
	vshd		%r29,%r22,%r20

L$rrest addib,= 	4,%r24,L$rend1
	nop
L$eroop ldws,ma 	4(0,%r25),%r22
	stws,ma 	%r20,4(0,%r26)
	addib,<=	-1,%r24,L$rend2
	vshd		%r22,%r29,%r20
	ldws,ma 	4(0,%r25),%r29
	stws,ma 	%r20,4(0,%r26)
	addib,> 	-1,%r24,L$eroop
	vshd		%r29,%r22,%r20

L$rend1  stws,ma	 %r20,4(0,%r26)
	vshd		%r0,%r29,%r20
	bv		0(%r2)
	stw		%r20,0(0,%r26)
L$rend2  stws,ma	 %r20,4(0,%r26)
L$r004	vshd		%r0,%r22,%r20
	bv		0(%r2)
	stw		%r20,0(0,%r26)

	.exit
	.procend