@@ -914,8 +914,9 @@ aes0to15:
914
914
MOVQ $masks<>(SB), AX
915
915
PAND (AX)(CX*8 ), X1
916
916
final1:
917
- AESENC X0, X1 // scramble input, xor in seed
918
- AESENC X1, X1 // scramble combo 2 times
917
+ PXOR X0, X1 // xor data with seed
918
+ AESENC X1, X1 // scramble combo 3 times
919
+ AESENC X1, X1
919
920
AESENC X1, X1
920
921
MOVQ X1, (DX)
921
922
RET
@@ -949,9 +950,13 @@ aes17to32:
949
950
MOVOU (AX), X2
950
951
MOVOU -16 (AX)(CX*1 ), X3
951
952
953
+ // xor with seed
954
+ PXOR X0, X2
955
+ PXOR X1, X3
956
+
952
957
// scramble 3 times
953
- AESENC X0 , X2
954
- AESENC X1 , X3
958
+ AESENC X2 , X2
959
+ AESENC X3 , X3
955
960
AESENC X2, X2
956
961
AESENC X3, X3
957
962
AESENC X2, X2
@@ -977,11 +982,16 @@ aes33to64:
977
982
MOVOU 16 (AX), X5
978
983
MOVOU -32 (AX)(CX*1 ), X6
979
984
MOVOU -16 (AX)(CX*1 ), X7
985
+
986
+ PXOR X0, X4
987
+ PXOR X1, X5
988
+ PXOR X2, X6
989
+ PXOR X3, X7
980
990
981
- AESENC X0 , X4
982
- AESENC X1 , X5
983
- AESENC X2 , X6
984
- AESENC X3 , X7
991
+ AESENC X4 , X4
992
+ AESENC X5 , X5
993
+ AESENC X6 , X6
994
+ AESENC X7 , X7
985
995
986
996
AESENC X4, X4
987
997
AESENC X5, X5
@@ -1032,17 +1042,17 @@ aes65to128:
1032
1042
MOVOU -32 (AX)(CX*1 ), X14
1033
1043
MOVOU -16 (AX)(CX*1 ), X15
1034
1044
1035
- // scramble data, xor in seed
1036
- AESENC X0, X8
1037
- AESENC X1, X9
1038
- AESENC X2, X10
1039
- AESENC X3, X11
1040
- AESENC X4, X12
1041
- AESENC X5, X13
1042
- AESENC X6, X14
1043
- AESENC X7, X15
1045
+ // xor with seed
1046
+ PXOR X0, X8
1047
+ PXOR X1, X9
1048
+ PXOR X2, X10
1049
+ PXOR X3, X11
1050
+ PXOR X4, X12
1051
+ PXOR X5, X13
1052
+ PXOR X6, X14
1053
+ PXOR X7, X15
1044
1054
1045
- // scramble twice
1055
+ // scramble 3 times
1046
1056
AESENC X8, X8
1047
1057
AESENC X9, X9
1048
1058
AESENC X10, X10
@@ -1051,7 +1061,16 @@ aes65to128:
1051
1061
AESENC X13, X13
1052
1062
AESENC X14, X14
1053
1063
AESENC X15, X15
1054
-
1064
+
1065
+ AESENC X8, X8
1066
+ AESENC X9, X9
1067
+ AESENC X10, X10
1068
+ AESENC X11, X11
1069
+ AESENC X12, X12
1070
+ AESENC X13, X13
1071
+ AESENC X14, X14
1072
+ AESENC X15, X15
1073
+
1055
1074
AESENC X8, X8
1056
1075
AESENC X9, X9
1057
1076
AESENC X10, X10
@@ -1105,21 +1124,31 @@ aes129plus:
1105
1124
MOVOU -32 (AX)(CX*1 ), X14
1106
1125
MOVOU -16 (AX)(CX*1 ), X15
1107
1126
1108
- // scramble input once, xor in seed
1109
- AESENC X0, X8
1110
- AESENC X1, X9
1111
- AESENC X2, X10
1112
- AESENC X3, X11
1113
- AESENC X4, X12
1114
- AESENC X5, X13
1115
- AESENC X6, X14
1116
- AESENC X7, X15
1127
+ // xor in seed
1128
+ PXOR X0, X8
1129
+ PXOR X1, X9
1130
+ PXOR X2, X10
1131
+ PXOR X3, X11
1132
+ PXOR X4, X12
1133
+ PXOR X5, X13
1134
+ PXOR X6, X14
1135
+ PXOR X7, X15
1117
1136
1118
1137
// compute number of remaining 128-byte blocks
1119
1138
DECQ CX
1120
1139
SHRQ $7 , CX
1121
1140
1122
1141
aesloop:
1142
+ // scramble state
1143
+ AESENC X8, X8
1144
+ AESENC X9, X9
1145
+ AESENC X10, X10
1146
+ AESENC X11, X11
1147
+ AESENC X12, X12
1148
+ AESENC X13, X13
1149
+ AESENC X14, X14
1150
+ AESENC X15, X15
1151
+
1123
1152
// scramble state, xor in a block
1124
1153
MOVOU (AX), X0
1125
1154
MOVOU 16 (AX), X1
@@ -1138,7 +1167,11 @@ aesloop:
1138
1167
AESENC X6, X14
1139
1168
AESENC X7, X15
1140
1169
1141
- // scramble state
1170
+ ADDQ $128 , AX
1171
+ DECQ CX
1172
+ JNE aesloop
1173
+
1174
+ // 3 more scrambles to finish
1142
1175
AESENC X8, X8
1143
1176
AESENC X9, X9
1144
1177
AESENC X10, X10
@@ -1147,12 +1180,6 @@ aesloop:
1147
1180
AESENC X13, X13
1148
1181
AESENC X14, X14
1149
1182
AESENC X15, X15
1150
-
1151
- ADDQ $128 , AX
1152
- DECQ CX
1153
- JNE aesloop
1154
-
1155
- // 2 more scrambles to finish
1156
1183
AESENC X8, X8
1157
1184
AESENC X9, X9
1158
1185
AESENC X10, X10
0 commit comments