Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
154 | magnus | 1 | Origin: upstream, https://git.lysator.liu.se/nettle/nettle/commit/fa269b6ad06dd13c901dbd84a12e52b918a09cd7 |
2 | From: Niels Möller <nisse@lysator.liu.se> |
||
3 | Subject: CVE-2015-8804: Carry folding bug in x86_64 ecc_384_modp. |
||
4 | Bug: https://bugs.debian.org/813679 |
||
5 | |||
6 | --- a/x86_64/ecc-384-modp.asm |
||
7 | +++ b/x86_64/ecc-384-modp.asm |
||
8 | @@ -20,7 +20,7 @@ C MA 02111-1301, USA. |
||
9 | .file "ecc-384-modp.asm" |
||
10 | |||
11 | define(<RP>, <%rsi>) |
||
12 | -define(<D4>, <%rax>) |
||
13 | +define(<D5>, <%rax>) |
||
14 | define(<T0>, <%rbx>) |
||
15 | define(<T1>, <%rcx>) |
||
16 | define(<T2>, <%rdx>) |
||
17 | @@ -35,8 +35,8 @@ define(<H4>, <%r13>) |
||
18 | define(<H5>, <%r14>) |
||
19 | define(<C2>, <%r15>) |
||
20 | define(<C0>, H5) C Overlap |
||
21 | -define(<D0>, RP) C Overlap |
||
22 | -define(<TMP>, H4) C Overlap |
||
23 | +define(<TMP>, RP) C Overlap |
||
24 | + |
||
25 | |||
26 | PROLOGUE(nettle_ecc_384_modp) |
||
27 | W64_ENTRY(2, 0) |
||
28 | @@ -48,34 +48,38 @@ PROLOGUE(nettle_ecc_384_modp) |
||
29 | push %r14 |
||
30 | push %r15 |
||
31 | |||
32 | - C First get top 2 limbs, which need folding twice |
||
33 | + C First get top 2 limbs, which need folding twice. |
||
34 | + C B^10 = B^6 + B^4 + 2^32 (B-1)B^4. |
||
35 | + C We handle the terms as follow: |
||
36 | C |
||
37 | - C H5 H4 |
||
38 | - C -H5 |
||
39 | - C ------ |
||
40 | - C H0 D4 |
||
41 | + C B^6: Folded immediatly. |
||
42 | C |
||
43 | - C Then shift right, (H1,H0,D4) <-- (H0,D4) << 32 |
||
44 | - C and add |
||
45 | + C B^4: Delayed, added in in the next folding. |
||
46 | C |
||
47 | - C H5 H4 |
||
48 | - C H1 H0 |
||
49 | - C ---------- |
||
50 | - C C2 H1 H0 |
||
51 | - |
||
52 | - mov 80(RP), D4 |
||
53 | - mov 88(RP), H0 |
||
54 | - mov D4, H4 |
||
55 | - mov H0, H5 |
||
56 | - sub H0, D4 |
||
57 | - sbb $0, H0 |
||
58 | - |
||
59 | - mov D4, T2 |
||
60 | - mov H0, H1 |
||
61 | - shl $32, H0 |
||
62 | - shr $32, T2 |
||
63 | + C 2^32(B-1) B^4: Low half limb delayed until the next |
||
64 | + C folding. Top 1.5 limbs subtracted and shifter now, resulting |
||
65 | + C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added |
||
66 | + C in. |
||
67 | + |
||
68 | + mov 80(RP), H4 |
||
69 | + mov 88(RP), H5 |
||
70 | + C Shift right 32 bits, into H1, H0 |
||
71 | + mov H4, H0 |
||
72 | + mov H5, H1 |
||
73 | + mov H5, D5 |
||
74 | shr $32, H1 |
||
75 | - or T2, H0 |
||
76 | + shl $32, D5 |
||
77 | + shr $32, H0 |
||
78 | + or D5, H0 |
||
79 | + |
||
80 | + C H1 H0 |
||
81 | + C - H1 H0 |
||
82 | + C -------- |
||
83 | + C H1 H0 D5 |
||
84 | + mov H0, D5 |
||
85 | + neg D5 |
||
86 | + sbb H1, H0 |
||
87 | + sbb $0, H1 |
||
88 | |||
89 | xor C2, C2 |
||
90 | add H4, H0 |
||
91 | @@ -114,118 +118,95 @@ PROLOGUE(nettle_ecc_384_modp) |
||
92 | adc H3, T5 |
||
93 | adc $0, C0 |
||
94 | |||
95 | - C H3 H2 H1 H0 0 |
||
96 | - C - H4 H3 H2 H1 H0 |
||
97 | - C --------------- |
||
98 | - C H3 H2 H1 H0 D0 |
||
99 | - |
||
100 | - mov XREG(D4), XREG(D4) |
||
101 | - mov H0, D0 |
||
102 | - neg D0 |
||
103 | - sbb H1, H0 |
||
104 | - sbb H2, H1 |
||
105 | - sbb H3, H2 |
||
106 | - sbb H4, H3 |
||
107 | - sbb $0, D4 |
||
108 | - |
||
109 | - C Shift right. High bits are sign, to be added to C0. |
||
110 | - mov D4, TMP |
||
111 | - sar $32, TMP |
||
112 | - shl $32, D4 |
||
113 | - add TMP, C0 |
||
114 | - |
||
115 | + C Shift left, including low half of H4 |
||
116 | mov H3, TMP |
||
117 | + shl $32, H4 |
||
118 | shr $32, TMP |
||
119 | - shl $32, H3 |
||
120 | - or TMP, D4 |
||
121 | + or TMP, H4 |
||
122 | |||
123 | mov H2, TMP |
||
124 | + shl $32, H3 |
||
125 | shr $32, TMP |
||
126 | - shl $32, H2 |
||
127 | or TMP, H3 |
||
128 | |||
129 | mov H1, TMP |
||
130 | + shl $32, H2 |
||
131 | shr $32, TMP |
||
132 | - shl $32, H1 |
||
133 | or TMP, H2 |
||
134 | |||
135 | mov H0, TMP |
||
136 | + shl $32, H1 |
||
137 | shr $32, TMP |
||
138 | - shl $32, H0 |
||
139 | or TMP, H1 |
||
140 | |||
141 | - mov D0, TMP |
||
142 | - shr $32, TMP |
||
143 | - shl $32, D0 |
||
144 | - or TMP, H0 |
||
145 | + shl $32, H0 |
||
146 | + |
||
147 | + C H4 H3 H2 H1 H0 0 |
||
148 | + C - H4 H3 H2 H1 H0 |
||
149 | + C --------------- |
||
150 | + C H4 H3 H2 H1 H0 TMP |
||
151 | |||
152 | - add D0, T0 |
||
153 | + mov H0, TMP |
||
154 | + neg TMP |
||
155 | + sbb H1, H0 |
||
156 | + sbb H2, H1 |
||
157 | + sbb H3, H2 |
||
158 | + sbb H4, H3 |
||
159 | + sbb $0, H4 |
||
160 | + |
||
161 | + add TMP, T0 |
||
162 | adc H0, T1 |
||
163 | adc H1, T2 |
||
164 | adc H2, T3 |
||
165 | adc H3, T4 |
||
166 | - adc D4, T5 |
||
167 | + adc H4, T5 |
||
168 | adc $0, C0 |
||
169 | |||
170 | C Remains to add in C2 and C0 |
||
171 | - C C0 C0<<32 (-2^32+1)C0 |
||
172 | - C C2 C2<<32 (-2^32+1)C2 |
||
173 | - C where C2 is always positive, while C0 may be -1. |
||
174 | + C Set H1, H0 = (2^96 - 2^32 + 1) C0 |
||
175 | mov C0, H0 |
||
176 | mov C0, H1 |
||
177 | - mov C0, H2 |
||
178 | - sar $63, C0 C Get sign |
||
179 | shl $32, H1 |
||
180 | - sub H1, H0 C Gives borrow iff C0 > 0 |
||
181 | + sub H1, H0 |
||
182 | sbb $0, H1 |
||
183 | - add C0, H2 |
||
184 | |||
185 | + C Set H3, H2 = (2^96 - 2^32 + 1) C2 |
||
186 | + mov C2, H2 |
||
187 | + mov C2, H3 |
||
188 | + shl $32, H3 |
||
189 | + sub H3, H2 |
||
190 | + sbb $0, H3 |
||
191 | + add C0, H2 C No carry. Could use lea trick |
||
192 | + |
||
193 | + xor C0, C0 |
||
194 | add H0, T0 |
||
195 | adc H1, T1 |
||
196 | - adc $0, H2 |
||
197 | - adc $0, C0 |
||
198 | - |
||
199 | - C Set (H1 H0) <-- C2 << 96 - C2 << 32 + 1 |
||
200 | - mov C2, H0 |
||
201 | - mov C2, H1 |
||
202 | - shl $32, H1 |
||
203 | - sub H1, H0 |
||
204 | - sbb $0, H1 |
||
205 | - |
||
206 | - add H2, H0 |
||
207 | - adc C0, H1 |
||
208 | - adc C2, C0 |
||
209 | - mov C0, H2 |
||
210 | - sar $63, C0 |
||
211 | - add H0, T2 |
||
212 | - adc H1, T3 |
||
213 | - adc H2, T4 |
||
214 | - adc C0, T5 |
||
215 | - sbb C0, C0 |
||
216 | + adc H2, T2 |
||
217 | + adc H3, T3 |
||
218 | + adc C2, T4 |
||
219 | + adc D5, T5 C Value delayed from initial folding |
||
220 | + adc $0, C0 C Use sbb and switch sign? |
||
221 | |||
222 | C Final unlikely carry |
||
223 | mov C0, H0 |
||
224 | mov C0, H1 |
||
225 | - mov C0, H2 |
||
226 | - sar $63, C0 |
||
227 | shl $32, H1 |
||
228 | sub H1, H0 |
||
229 | sbb $0, H1 |
||
230 | - add C0, H2 |
||
231 | |||
232 | pop RP |
||
233 | |||
234 | - sub H0, T0 |
||
235 | + add H0, T0 |
||
236 | mov T0, (RP) |
||
237 | - sbb H1, T1 |
||
238 | + adc H1, T1 |
||
239 | mov T1, 8(RP) |
||
240 | - sbb H2, T2 |
||
241 | + adc C0, T2 |
||
242 | mov T2, 16(RP) |
||
243 | - sbb C0, T3 |
||
244 | + adc $0, T3 |
||
245 | mov T3, 24(RP) |
||
246 | - sbb C0, T4 |
||
247 | + adc $0, T4 |
||
248 | mov T4, 32(RP) |
||
249 | - sbb C0, T5 |
||
250 | + adc $0, T5 |
||
251 | mov T5, 40(RP) |
||
252 | |||
253 | pop %r15 |