# clobbering all xmm registers
# clobbering r10, r11, r12, r13, r14, r15
.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
+ vmovdqu AadHash(arg2), %xmm8
vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey
+ add arg5, InLen(arg2)
mov arg5, %r13 # save the number of bytes of plaintext/ciphertext
and $-16, %r13 # r13 = r13 - (r13 mod 16)
_zero_cipher_left\@:
+ vmovdqu %xmm14, AadHash(arg2)
+ vmovdqu %xmm9, CurCount(arg2)
+
cmp $16, arg5
jl _only_less_than_16\@
# handle the last <16 Byte block seperately
+ mov %r13, PBlockLen(arg2)
vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
+ vmovdqu %xmm9, CurCount(arg2)
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
+ vmovdqu %xmm9, PBlockEncKey(arg2)
sub $16, %r11
add %r13, %r11
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
+ vmovdqu %xmm9, PBlockEncKey(arg2)
lea SHIFT_MASK+16(%rip), %r12
sub %r13, %r12 # adjust the shuffle mask pointer to be
vpxor %xmm2, %xmm14, %xmm14
#GHASH computation for the last <16 Byte block
\GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ vmovdqu %xmm14, AadHash(arg2)
sub %r13, %r11
add $16, %r11
.else
vpxor %xmm9, %xmm14, %xmm14
#GHASH computation for the last <16 Byte block
\GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ vmovdqu %xmm14, AadHash(arg2)
sub %r13, %r11
add $16, %r11
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
# Output: Authorization Tag (AUTH_TAG)
# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
.macro GCM_COMPLETE GHASH_MUL REP
- mov arg8, %r12 # r12 = aadLen (number of bytes)
+ vmovdqu AadHash(arg2), %xmm14
+ vmovdqu HashKey(arg2), %xmm13
+
+ mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes)
shl $3, %r12 # convert into number of bits
vmovd %r12d, %xmm15 # len(A) in xmm15
- shl $3, arg5 # len(C) in bits (*128)
- vmovq arg5, %xmm1
+ mov InLen(arg2), %r12
+ shl $3, %r12 # len(C) in bits (*128)
+ vmovq %r12, %xmm1
vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
\GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
- mov arg6, %rax # rax = *Y0
- vmovdqu (%rax), %xmm9 # xmm9 = Y0
+ vmovdqu OrigIV(arg2), %xmm9
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0)
.endm
.macro INIT GHASH_MUL PRECOMPUTE
+ mov arg6, %r11
+ mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length
+ xor %r11d, %r11d
+ mov %r11, InLen(arg2) # ctx_data.in_length = 0
+
+ mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0
+ mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0
+ mov arg4, %rax
+ movdqu (%rax), %xmm0
+ movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv
+
+ vpshufb SHUF_MASK(%rip), %xmm0, %xmm0
+ movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv
+
vmovdqu (arg3), %xmm6 # xmm6 = HashKey
vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
xor %r11d, %r11d
# start AES for num_initial_blocks blocks
- mov arg6, %rax # rax = *Y0
- vmovdqu (%rax), \CTR # CTR = Y0
- vpshufb SHUF_MASK(%rip), \CTR, \CTR
-
+ vmovdqu CurCount(arg2), \CTR
i = (9-\num_initial_blocks)
setreg
.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
i = (8-\num_initial_blocks)
setreg
- vmovdqu AadHash(arg2), reg_i
+ vmovdqu AadHash(arg2), reg_i
# initialize the data pointer offset as zero
xor %r11d, %r11d
# start AES for num_initial_blocks blocks
- mov arg6, %rax # rax = *Y0
- vmovdqu (%rax), \CTR # CTR = Y0
- vpshufb SHUF_MASK(%rip), \CTR, \CTR
-
+ vmovdqu CurCount(arg2), \CTR
i = (9-\num_initial_blocks)
setreg