.global _start .data input: .byte 5, 1, 2, 3, 4, 5, 6, 7 .byte 8, 9, 10, 11, 12, 13, 14, 15 sbox: .byte 0xC, 0x5, 0x6, 0xB, 0x9, 0x0, 0xA, 0xD .byte 0x3, 0xE, 0xF, 0x8, 0x4, 0x7, 0x1, 0x2 .text // INPUT: 64 bits block (in r0), sbox (in r1) // OUTPUT: 64 bits block (in r0) // Processing: subtitute every nibble using the sbox sub_block: /* for (i = 0; i < 8; i++) { uint8_t c = input[i]; uint8_t c_lo = c & 15; // extract (keep) low nibble uint8_t c_hi = c >> 4; // extract high nibble output[i] = sbox[c_lo]; // store in low nibble output[i] |= sbox[c_hi] << 4; // store in high nibble } */ // loop through 8 bytes (64 bits) (use r3 as counter) // i = 0 eor r3, r3 sub_block_loop: cmp r3, #8 bge sub_block_loop_end // c = input[i] (r4 is low, r5 is high) ldrb r4, [r0, r3] mov r5, r4 and r4, #15 lsr r5, #4 // sbox[c_lo] ldrb r4, [r1, r4] // sbox[c_hi] ldrb r5, [r1, r5] // output[i] = (hi << 4) | lo orr r4, r5 strb r4, [r0, r3] add r3, #1 b sub_block_loop sub_block_loop_end: // return mov pc, lr _start: ldr r0, =input ldr r1, =sbox bl sub_block b forever forever: b forever