1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
.global _start
.data
input:
.byte 5, 1, 2, 3, 4, 5, 6, 7
.byte 8, 9, 10, 11, 12, 13, 14, 15
sbox:
.byte 0xC, 0x5, 0x6, 0xB, 0x9, 0x0, 0xA, 0xD
.byte 0x3, 0xE, 0xF, 0x8, 0x4, 0x7, 0x1, 0x2
.text
// INPUT: 64 bits block (in r0), sbox (in r1)
// OUTPUT: 64 bits block (in r0)
// Processing: subtitute every nibble using the sbox
sub_block:
/*
for (i = 0; i < 8; i++) {
uint8_t c = input[i];
uint8_t c_lo = c & 15; // extract (keep) low nibble
uint8_t c_hi = c >> 4; // extract high nibble
output[i] = sbox[c_lo]; // store in low nibble
output[i] |= sbox[c_hi] << 4; // store in high nibble
}
*/
// loop through 8 bytes (64 bits) (use r3 as counter)
// i = 0
eor r3, r3
sub_block_loop:
cmp r3, #8
bge sub_block_loop_end
// c = input[i] (r4 is low, r5 is high)
ldrb r4, [r0, r3]
mov r5, r4
and r4, #15
lsr r5, #4
// sbox[c_lo]
ldrb r4, [r1, r4]
// sbox[c_hi]
ldrb r5, [r1, r5]
// output[i] = (hi << 4) | lo
orr r4, r5
strb r4, [r0, r3]
add r3, #1
b sub_block_loop
sub_block_loop_end:
// return
mov pc, lr
_start:
ldr r0, =input
ldr r1, =sbox
bl sub_block
b forever
forever:
b forever
|