File tree Expand file tree Collapse file tree 12 files changed +150
-48
lines changed
Expand file tree Collapse file tree 12 files changed +150
-48
lines changed Original file line number Diff line number Diff line change @@ -3075,11 +3075,19 @@ __bn_sqrx8x_internal:
30753075
30763076.align 32
30773077.Lsqrx8x_break:
3078- subq 16 +8 (%rsp ),%r8
3078+ xorq %rbp ,%rbp
3079+ subq 16 +8 (%rsp ),%rbx
3080+ adcxq %rbp ,%r8
30793081movq 24 +8 (%rsp ),%rcx
3082+ adcxq %rbp ,%r9
30803083movq 0 (%rsi ),%rdx
3081- xorl %ebp , %ebp
3084+ adcq $0 , %r10
30823085movq %r8 ,0 (%rdi )
3086+ adcq $0 ,%r11
3087+ adcq $0 ,%r12
3088+ adcq $0 ,%r13
3089+ adcq $0 ,%r14
3090+ adcq $0 ,%r15
30833091cmpq %rcx ,%rdi
30843092je .Lsqrx8x_outer_loop
30853093
Original file line number Diff line number Diff line change @@ -1036,19 +1036,18 @@ __ecp_nistz256_sqr_montx:
10361036 adoxq %rbp ,%r13
10371037.byte 0x67 ,0x67
10381038mulxq %rdx ,%rcx ,%rax
1039- movq %r8 ,%rdx
1039+ movq .Lpoly+ 24 ( %rip ) ,%rdx
10401040 adoxq %rcx ,%r14
10411041 shlxq %rsi ,%r8 ,%rcx
10421042 adoxq %rax ,%r15
10431043 shrxq %rsi ,%r8 ,%rax
1044- movq .Lpoly+ 24 ( %rip ) ,%rbp
1044+ movq %rdx ,%rbp
10451045
10461046
10471047addq %rcx ,%r9
10481048adcq %rax ,%r10
10491049
1050- mulxq %rbp ,%rcx ,%r8
1051- movq %r9 ,%rdx
1050+ mulxq %r8 ,%rcx ,%r8
10521051adcq %rcx ,%r11
10531052 shlxq %rsi ,%r9 ,%rcx
10541053adcq $0 ,%r8
@@ -1058,8 +1057,7 @@ __ecp_nistz256_sqr_montx:
10581057addq %rcx ,%r10
10591058adcq %rax ,%r11
10601059
1061- mulxq %rbp ,%rcx ,%r9
1062- movq %r10 ,%rdx
1060+ mulxq %r9 ,%rcx ,%r9
10631061adcq %rcx ,%r8
10641062 shlxq %rsi ,%r10 ,%rcx
10651063adcq $0 ,%r9
@@ -1069,8 +1067,7 @@ __ecp_nistz256_sqr_montx:
10691067addq %rcx ,%r11
10701068adcq %rax ,%r8
10711069
1072- mulxq %rbp ,%rcx ,%r10
1073- movq %r11 ,%rdx
1070+ mulxq %r10 ,%rcx ,%r10
10741071adcq %rcx ,%r9
10751072 shlxq %rsi ,%r11 ,%rcx
10761073adcq $0 ,%r10
@@ -1080,12 +1077,12 @@ __ecp_nistz256_sqr_montx:
10801077addq %rcx ,%r8
10811078adcq %rax ,%r9
10821079
1083- mulxq %rbp ,%rcx ,%r11
1080+ mulxq %r11 ,%rcx ,%r11
10841081adcq %rcx ,%r10
10851082adcq $0 ,%r11
10861083
10871084xorq %rdx ,%rdx
1088- adcq %r8 ,%r12
1085+ addq %r8 ,%r12
10891086movq .Lpoly+8 (%rip ),%rsi
10901087adcq %r9 ,%r13
10911088movq %r12 ,%r8
@@ -1094,8 +1091,7 @@ __ecp_nistz256_sqr_montx:
10941091movq %r13 ,%r9
10951092adcq $0 ,%rdx
10961093
1097- xorl %eax ,%eax
1098- sbbq $-1 ,%r12
1094+ subq $-1 ,%r12
10991095movq %r14 ,%r10
11001096sbbq %rsi ,%r13
11011097sbbq $0 ,%r14
Original file line number Diff line number Diff line change @@ -116,8 +116,19 @@ OPENSSL_ia32_cpuid:
116116orl $0x40000000 ,%edx
117117andb $15 ,%ah
118118cmpb $15 ,%ah
119- jne .Lnotintel
119+ jne .LnotP4
120120orl $0x00100000 ,%edx
121+ .LnotP4:
122+ cmpb $6 ,%ah
123+ jne .Lnotintel
124+ andl $0x0fff0ff0 ,%eax
125+ cmpl $0x00050670 ,%eax
126+ je .Lknights
127+ cmpl $0x00080650 ,%eax
128+ jne .Lnotintel
129+ .Lknights:
130+ andl $0xfbffffff ,%ecx
131+
121132.Lnotintel:
122133btl $28 ,%edx
123134jnc .Lgeneric
@@ -142,6 +153,10 @@ OPENSSL_ia32_cpuid:
142153movl $7 ,%eax
143154xorl %ecx ,%ecx
144155cpuid
156+ btl $26 ,%r9d
157+ jc .Lnotknights
158+ andl $0xfff7ffff ,%ebx
159+ .Lnotknights:
145160movl %ebx ,8 (%rdi )
146161.Lno_extended_info:
147162
Original file line number Diff line number Diff line change @@ -3075,11 +3075,19 @@ L$sqrx8x_loop:
30753075
30763076.p2align 5
30773077L$sqrx8x_break:
3078- subq 16 +8 (%rsp ),%r8
3078+ xorq %rbp ,%rbp
3079+ subq 16 +8 (%rsp ),%rbx
3080+ adcxq %rbp ,%r8
30793081movq 24 +8 (%rsp ),%rcx
3082+ adcxq %rbp ,%r9
30803083movq 0 (%rsi ),%rdx
3081- xorl %ebp , %ebp
3084+ adcq $0 , %r10
30823085movq %r8 ,0 (%rdi )
3086+ adcq $0 ,%r11
3087+ adcq $0 ,%r12
3088+ adcq $0 ,%r13
3089+ adcq $0 ,%r14
3090+ adcq $0 ,%r15
30833091cmpq %rcx ,%rdi
30843092je L$sqrx8x_outer_loop
30853093
Original file line number Diff line number Diff line change @@ -1036,19 +1036,18 @@ __ecp_nistz256_sqr_montx:
10361036 adoxq %rbp ,%r13
10371037.byte 0x67 ,0x67
10381038mulxq %rdx ,%rcx ,%rax
1039- movq %r8 ,%rdx
1039+ movq L$poly+ 24 ( %rip ) ,%rdx
10401040 adoxq %rcx ,%r14
10411041 shlxq %rsi ,%r8 ,%rcx
10421042 adoxq %rax ,%r15
10431043 shrxq %rsi ,%r8 ,%rax
1044- movq L$poly+ 24 ( %rip ) ,%rbp
1044+ movq %rdx ,%rbp
10451045
10461046
10471047addq %rcx ,%r9
10481048adcq %rax ,%r10
10491049
1050- mulxq %rbp ,%rcx ,%r8
1051- movq %r9 ,%rdx
1050+ mulxq %r8 ,%rcx ,%r8
10521051adcq %rcx ,%r11
10531052 shlxq %rsi ,%r9 ,%rcx
10541053adcq $0 ,%r8
@@ -1058,8 +1057,7 @@ __ecp_nistz256_sqr_montx:
10581057addq %rcx ,%r10
10591058adcq %rax ,%r11
10601059
1061- mulxq %rbp ,%rcx ,%r9
1062- movq %r10 ,%rdx
1060+ mulxq %r9 ,%rcx ,%r9
10631061adcq %rcx ,%r8
10641062 shlxq %rsi ,%r10 ,%rcx
10651063adcq $0 ,%r9
@@ -1069,8 +1067,7 @@ __ecp_nistz256_sqr_montx:
10691067addq %rcx ,%r11
10701068adcq %rax ,%r8
10711069
1072- mulxq %rbp ,%rcx ,%r10
1073- movq %r11 ,%rdx
1070+ mulxq %r10 ,%rcx ,%r10
10741071adcq %rcx ,%r9
10751072 shlxq %rsi ,%r11 ,%rcx
10761073adcq $0 ,%r10
@@ -1080,12 +1077,12 @@ __ecp_nistz256_sqr_montx:
10801077addq %rcx ,%r8
10811078adcq %rax ,%r9
10821079
1083- mulxq %rbp ,%rcx ,%r11
1080+ mulxq %r11 ,%rcx ,%r11
10841081adcq %rcx ,%r10
10851082adcq $0 ,%r11
10861083
10871084xorq %rdx ,%rdx
1088- adcq %r8 ,%r12
1085+ addq %r8 ,%r12
10891086movq L$poly+8 (%rip ),%rsi
10901087adcq %r9 ,%r13
10911088movq %r12 ,%r8
@@ -1094,8 +1091,7 @@ __ecp_nistz256_sqr_montx:
10941091movq %r13 ,%r9
10951092adcq $0 ,%rdx
10961093
1097- xorl %eax ,%eax
1098- sbbq $-1 ,%r12
1094+ subq $-1 ,%r12
10991095movq %r14 ,%r10
11001096sbbq %rsi ,%r13
11011097sbbq $0 ,%r14
Original file line number Diff line number Diff line change @@ -117,8 +117,19 @@ L$nocacheinfo:
117117orl $0x40000000 ,%edx
118118andb $15 ,%ah
119119cmpb $15 ,%ah
120- jne L$notintel
120+ jne L$notP4
121121orl $0x00100000 ,%edx
122+ L$notP4:
123+ cmpb $6 ,%ah
124+ jne L$notintel
125+ andl $0x0fff0ff0 ,%eax
126+ cmpl $0x00050670 ,%eax
127+ je L$knights
128+ cmpl $0x00080650 ,%eax
129+ jne L$notintel
130+ L$knights:
131+ andl $0xfbffffff ,%ecx
132+
122133L$notintel:
123134btl $28 ,%edx
124135jnc L$generic
@@ -143,6 +154,10 @@ L$generic:
143154movl $7 ,%eax
144155xorl %ecx ,%ecx
145156cpuid
157+ btl $26 ,%r9d
158+ jc L$notknights
159+ andl $0xfff7ffff ,%ebx
160+ L$notknights:
146161movl %ebx ,8 (%rdi )
147162L$no_extended_info:
148163
Original file line number Diff line number Diff line change @@ -3166,11 +3166,19 @@ DB 067h
31663166
31673167ALIGN 32
31683168$ L $ sqrx8x_break::
3169- sub r8 , QWORD PTR [ (( 16 + 8 )) + rsp ]
3169+ xor rbp , rbp
3170+ sub rbx , QWORD PTR [ (( 16 + 8 )) + rsp ]
3171+ adcx r8 , rbp
31703172mov rcx , QWORD PTR [ (( 24 + 8 )) + rsp ]
3173+ adcx r9 , rbp
31713174mov rdx , QWORD PTR [ rsi ]
3172- xor ebp , ebp
3175+ adc r10 , 0
31733176mov QWORD PTR [ rdi ], r8
3177+ adc r11 , 0
3178+ adc r12 , 0
3179+ adc r13 , 0
3180+ adc r14 , 0
3181+ adc r15 , 0
31743182cmp rdi , rcx
31753183je $ L $ sqrx8x_outer_loop
31763184
Original file line number Diff line number Diff line change @@ -1137,19 +1137,18 @@ DB 067h
11371137adox r13 , rbp
11381138DB 067h , 067h
11391139mulx rax , rcx , rdx
1140- mov rdx , r8
1140+ mov rdx , QWORD PTR [ (( $ L $ poly + 24 )) ]
11411141adox r14 , rcx
11421142shlx rcx , r8 , rsi
11431143adox r15 , rax
11441144shrx rax , r8 , rsi
1145- mov rbp , QWORD PTR [ (( $ L $ poly + 24 )) ]
1145+ mov rbp , rdx
11461146
11471147
11481148add r9 , rcx
11491149adc r10 , rax
11501150
1151- mulx r8 , rcx , rbp
1152- mov rdx , r9
1151+ mulx r8 , rcx , r8
11531152adc r11 , rcx
11541153shlx rcx , r9 , rsi
11551154adc r8 , 0
@@ -1159,8 +1158,7 @@ DB 067h,067h
11591158add r10 , rcx
11601159adc r11 , rax
11611160
1162- mulx r9 , rcx , rbp
1163- mov rdx , r10
1161+ mulx r9 , rcx , r9
11641162adc r8 , rcx
11651163shlx rcx , r10 , rsi
11661164adc r9 , 0
@@ -1170,8 +1168,7 @@ DB 067h,067h
11701168add r11 , rcx
11711169adc r8 , rax
11721170
1173- mulx r10 , rcx , rbp
1174- mov rdx , r11
1171+ mulx r10 , rcx , r10
11751172adc r9 , rcx
11761173shlx rcx , r11 , rsi
11771174adc r10 , 0
@@ -1181,12 +1178,12 @@ DB 067h,067h
11811178add r8 , rcx
11821179adc r9 , rax
11831180
1184- mulx r11 , rcx , rbp
1181+ mulx r11 , rcx , r11
11851182adc r10 , rcx
11861183adc r11 , 0
11871184
11881185xor rdx , rdx
1189- adc r12 , r8
1186+ add r12 , r8
11901187mov rsi , QWORD PTR [ (( $ L $ poly + 8 )) ]
11911188adc r13 , r9
11921189mov r8 , r12
@@ -1195,8 +1192,7 @@ DB 067h,067h
11951192mov r9 , r13
11961193adc rdx , 0
11971194
1198- xor eax , eax
1199- sbb r12 ,- 1
1195+ sub r12 ,- 1
12001196mov r10 , r14
12011197sbb r13 , rsi
12021198sbb r14 , 0
Original file line number Diff line number Diff line change @@ -127,8 +127,19 @@ $L$nocacheinfo::
127127or edx , 040000000h
128128and ah , 15
129129cmp ah , 15
130- jne $ L $ notintel
130+ jne $ L $ notP4
131131or edx , 000100000h
132+ $ L $ notP4::
133+ cmp ah , 6
134+ jne $ L $ notintel
135+ and eax , 00fff0ff0h
136+ cmp eax , 000050670h
137+ je $ L $ knights
138+ cmp eax , 000080650h
139+ jne $ L $ notintel
140+ $ L $ knights::
141+ and ecx , 0fbffffffh
142+
132143$ L $ notintel::
133144bt edx , 28
134145jnc $ L $ generic
@@ -153,6 +164,10 @@ $L$generic::
153164mov eax , 7
154165xor ecx , ecx
155166cpuid
167+ bt r9d , 26
168+ jc $ L $ notknights
169+ and ebx , 0fff7ffffh
170+ $ L $ notknights::
156171mov DWORD PTR [ 8 + rdi ], ebx
157172$ L $ no_extended_info::
158173
Original file line number Diff line number Diff line change @@ -116,8 +116,19 @@ OPENSSL_ia32_cpuid:
116116orl $0x40000000 ,%edx
117117andb $15 ,%ah
118118cmpb $15 ,%ah
119- jne .Lnotintel
119+ jne .LnotP4
120120orl $0x00100000 ,%edx
121+ .LnotP4:
122+ cmpb $6 ,%ah
123+ jne .Lnotintel
124+ andl $0x0fff0ff0 ,%eax
125+ cmpl $0x00050670 ,%eax
126+ je .Lknights
127+ cmpl $0x00080650 ,%eax
128+ jne .Lnotintel
129+ .Lknights:
130+ andl $0xfbffffff ,%ecx
131+
121132.Lnotintel:
122133btl $28 ,%edx
123134jnc .Lgeneric
@@ -142,6 +153,10 @@ OPENSSL_ia32_cpuid:
142153movl $7 ,%eax
143154xorl %ecx ,%ecx
144155cpuid
156+ btl $26 ,%r9d
157+ jc .Lnotknights
158+ andl $0xfff7ffff ,%ebx
159+ .Lnotknights:
145160movl %ebx ,8 (%rdi )
146161.Lno_extended_info:
147162
You can’t perform that action at this time.
0 commit comments