* int blocks, u8 const rk2[], u8 iv[], int first)
*/
- .macro next_tweak, out, in, const, tmp
+ .macro next_tweak, out, in, tmp
sshr \tmp\().2d, \in\().2d, #63
- and \tmp\().16b, \tmp\().16b, \const\().16b
+ and \tmp\().16b, \tmp\().16b, xtsmask.16b
add \out\().2d, \in\().2d, \in\().2d
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
-.Lxts_mul_x:
-CPU_LE( .quad 1, 0x87 )
-CPU_BE( .quad 0x87, 1 )
+ .macro xts_load_mask, tmp
+ movi xtsmask.2s, #0x1
+ movi \tmp\().2s, #0x87
+ uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
+ .endm
AES_ENTRY(aes_xts_encrypt)
stp x29, x30, [sp, #-16]!
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
enc_switch_key w3, x2, x8
- ldr q7, .Lxts_mul_x
+ xts_load_mask v8
b .LxtsencNx
.Lxtsencnotfirst:
enc_prepare w3, x2, x8
.LxtsencloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ xts_reload_mask v8
+ next_tweak v4, v4, v8
.LxtsencNx:
subs w4, w4, #4
bmi .Lxtsenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
- next_tweak v5, v4, v7, v8
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_encrypt_block4x
eor v3.16b, v3.16b, v7.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
beq .Lxtsencout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsencloop
.Lxtsencout:
st1 {v4.16b}, [x6]
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
dec_prepare w3, x2, x8
- ldr q7, .Lxts_mul_x
+ xts_load_mask v8
b .LxtsdecNx
.Lxtsdecnotfirst:
dec_prepare w3, x2, x8
.LxtsdecloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ xts_reload_mask v8
+ next_tweak v4, v4, v8
.LxtsdecNx:
subs w4, w4, #4
bmi .Lxtsdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
- next_tweak v5, v4, v7, v8
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_decrypt_block4x
eor v3.16b, v3.16b, v7.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
beq .Lxtsdecout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]