00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 #ifndef MBEDTLS_BN_MUL_H
00066 #define MBEDTLS_BN_MUL_H
00067
00068 #if !defined(MBEDTLS_CONFIG_FILE)
00069 #include "config.h"
00070 #else
00071 #include MBEDTLS_CONFIG_FILE
00072 #endif
00073
00074 #include "bignum.h"
00075
00076 #if defined(MBEDTLS_HAVE_ASM)
00077
00078 #ifndef asm
00079 #define asm __asm
00080 #endif
00081
00082
00083 #if defined(__GNUC__) && \
00084 ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
00099 #define MULADDC_CANNOT_USE_EBX
00100 #endif
00101
00102
00103
00104
00105
00106
00107
00108 #if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
00109
00110 #define MULADDC_INIT \
00111 asm( \
00112 "movl %%ebx, %0 \n\t" \
00113 "movl %5, %%esi \n\t" \
00114 "movl %6, %%edi \n\t" \
00115 "movl %7, %%ecx \n\t" \
00116 "movl %8, %%ebx \n\t"
00117
00118 #define MULADDC_CORE \
00119 "lodsl \n\t" \
00120 "mull %%ebx \n\t" \
00121 "addl %%ecx, %%eax \n\t" \
00122 "adcl $0, %%edx \n\t" \
00123 "addl (%%edi), %%eax \n\t" \
00124 "adcl $0, %%edx \n\t" \
00125 "movl %%edx, %%ecx \n\t" \
00126 "stosl \n\t"
00127
00128 #if defined(MBEDTLS_HAVE_SSE2)
00129
00130 #define MULADDC_HUIT \
00131 "movd %%ecx, %%mm1 \n\t" \
00132 "movd %%ebx, %%mm0 \n\t" \
00133 "movd (%%edi), %%mm3 \n\t" \
00134 "paddq %%mm3, %%mm1 \n\t" \
00135 "movd (%%esi), %%mm2 \n\t" \
00136 "pmuludq %%mm0, %%mm2 \n\t" \
00137 "movd 4(%%esi), %%mm4 \n\t" \
00138 "pmuludq %%mm0, %%mm4 \n\t" \
00139 "movd 8(%%esi), %%mm6 \n\t" \
00140 "pmuludq %%mm0, %%mm6 \n\t" \
00141 "movd 12(%%esi), %%mm7 \n\t" \
00142 "pmuludq %%mm0, %%mm7 \n\t" \
00143 "paddq %%mm2, %%mm1 \n\t" \
00144 "movd 4(%%edi), %%mm3 \n\t" \
00145 "paddq %%mm4, %%mm3 \n\t" \
00146 "movd 8(%%edi), %%mm5 \n\t" \
00147 "paddq %%mm6, %%mm5 \n\t" \
00148 "movd 12(%%edi), %%mm4 \n\t" \
00149 "paddq %%mm4, %%mm7 \n\t" \
00150 "movd %%mm1, (%%edi) \n\t" \
00151 "movd 16(%%esi), %%mm2 \n\t" \
00152 "pmuludq %%mm0, %%mm2 \n\t" \
00153 "psrlq $32, %%mm1 \n\t" \
00154 "movd 20(%%esi), %%mm4 \n\t" \
00155 "pmuludq %%mm0, %%mm4 \n\t" \
00156 "paddq %%mm3, %%mm1 \n\t" \
00157 "movd 24(%%esi), %%mm6 \n\t" \
00158 "pmuludq %%mm0, %%mm6 \n\t" \
00159 "movd %%mm1, 4(%%edi) \n\t" \
00160 "psrlq $32, %%mm1 \n\t" \
00161 "movd 28(%%esi), %%mm3 \n\t" \
00162 "pmuludq %%mm0, %%mm3 \n\t" \
00163 "paddq %%mm5, %%mm1 \n\t" \
00164 "movd 16(%%edi), %%mm5 \n\t" \
00165 "paddq %%mm5, %%mm2 \n\t" \
00166 "movd %%mm1, 8(%%edi) \n\t" \
00167 "psrlq $32, %%mm1 \n\t" \
00168 "paddq %%mm7, %%mm1 \n\t" \
00169 "movd 20(%%edi), %%mm5 \n\t" \
00170 "paddq %%mm5, %%mm4 \n\t" \
00171 "movd %%mm1, 12(%%edi) \n\t" \
00172 "psrlq $32, %%mm1 \n\t" \
00173 "paddq %%mm2, %%mm1 \n\t" \
00174 "movd 24(%%edi), %%mm5 \n\t" \
00175 "paddq %%mm5, %%mm6 \n\t" \
00176 "movd %%mm1, 16(%%edi) \n\t" \
00177 "psrlq $32, %%mm1 \n\t" \
00178 "paddq %%mm4, %%mm1 \n\t" \
00179 "movd 28(%%edi), %%mm5 \n\t" \
00180 "paddq %%mm5, %%mm3 \n\t" \
00181 "movd %%mm1, 20(%%edi) \n\t" \
00182 "psrlq $32, %%mm1 \n\t" \
00183 "paddq %%mm6, %%mm1 \n\t" \
00184 "movd %%mm1, 24(%%edi) \n\t" \
00185 "psrlq $32, %%mm1 \n\t" \
00186 "paddq %%mm3, %%mm1 \n\t" \
00187 "movd %%mm1, 28(%%edi) \n\t" \
00188 "addl $32, %%edi \n\t" \
00189 "addl $32, %%esi \n\t" \
00190 "psrlq $32, %%mm1 \n\t" \
00191 "movd %%mm1, %%ecx \n\t"
00192
00193 #define MULADDC_STOP \
00194 "emms \n\t" \
00195 "movl %4, %%ebx \n\t" \
00196 "movl %%ecx, %1 \n\t" \
00197 "movl %%edi, %2 \n\t" \
00198 "movl %%esi, %3 \n\t" \
00199 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
00200 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
00201 : "eax", "ebx", "ecx", "edx", "esi", "edi" \
00202 );
00203
00204 #else
00205
00206 #define MULADDC_STOP \
00207 "movl %4, %%ebx \n\t" \
00208 "movl %%ecx, %1 \n\t" \
00209 "movl %%edi, %2 \n\t" \
00210 "movl %%esi, %3 \n\t" \
00211 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
00212 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
00213 : "eax", "ebx", "ecx", "edx", "esi", "edi" \
00214 );
00215 #endif
00216 #endif
00217
00218 #if defined(__amd64__) || defined (__x86_64__)
00219
00220 #define MULADDC_INIT \
00221 asm( \
00222 "xorq %%r8, %%r8\n"
00223
00224 #define MULADDC_CORE \
00225 "movq (%%rsi), %%rax\n" \
00226 "mulq %%rbx\n" \
00227 "addq $8, %%rsi\n" \
00228 "addq %%rcx, %%rax\n" \
00229 "movq %%r8, %%rcx\n" \
00230 "adcq $0, %%rdx\n" \
00231 "nop \n" \
00232 "addq %%rax, (%%rdi)\n" \
00233 "adcq %%rdx, %%rcx\n" \
00234 "addq $8, %%rdi\n"
00235
00236 #define MULADDC_STOP \
00237 : "+c" (c), "+D" (d), "+S" (s) \
00238 : "b" (b) \
00239 : "rax", "rdx", "r8" \
00240 );
00241
00242 #endif
00243
00244 #if defined(__mc68020__) || defined(__mcpu32__)
00245
00246 #define MULADDC_INIT \
00247 asm( \
00248 "movl %3, %%a2 \n\t" \
00249 "movl %4, %%a3 \n\t" \
00250 "movl %5, %%d3 \n\t" \
00251 "movl %6, %%d2 \n\t" \
00252 "moveq #0, %%d0 \n\t"
00253
00254 #define MULADDC_CORE \
00255 "movel %%a2@+, %%d1 \n\t" \
00256 "mulul %%d2, %%d4:%%d1 \n\t" \
00257 "addl %%d3, %%d1 \n\t" \
00258 "addxl %%d0, %%d4 \n\t" \
00259 "moveq #0, %%d3 \n\t" \
00260 "addl %%d1, %%a3@+ \n\t" \
00261 "addxl %%d4, %%d3 \n\t"
00262
00263 #define MULADDC_STOP \
00264 "movl %%d3, %0 \n\t" \
00265 "movl %%a3, %1 \n\t" \
00266 "movl %%a2, %2 \n\t" \
00267 : "=m" (c), "=m" (d), "=m" (s) \
00268 : "m" (s), "m" (d), "m" (c), "m" (b) \
00269 : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \
00270 );
00271
00272 #define MULADDC_HUIT \
00273 "movel %%a2@+, %%d1 \n\t" \
00274 "mulul %%d2, %%d4:%%d1 \n\t" \
00275 "addxl %%d3, %%d1 \n\t" \
00276 "addxl %%d0, %%d4 \n\t" \
00277 "addl %%d1, %%a3@+ \n\t" \
00278 "movel %%a2@+, %%d1 \n\t" \
00279 "mulul %%d2, %%d3:%%d1 \n\t" \
00280 "addxl %%d4, %%d1 \n\t" \
00281 "addxl %%d0, %%d3 \n\t" \
00282 "addl %%d1, %%a3@+ \n\t" \
00283 "movel %%a2@+, %%d1 \n\t" \
00284 "mulul %%d2, %%d4:%%d1 \n\t" \
00285 "addxl %%d3, %%d1 \n\t" \
00286 "addxl %%d0, %%d4 \n\t" \
00287 "addl %%d1, %%a3@+ \n\t" \
00288 "movel %%a2@+, %%d1 \n\t" \
00289 "mulul %%d2, %%d3:%%d1 \n\t" \
00290 "addxl %%d4, %%d1 \n\t" \
00291 "addxl %%d0, %%d3 \n\t" \
00292 "addl %%d1, %%a3@+ \n\t" \
00293 "movel %%a2@+, %%d1 \n\t" \
00294 "mulul %%d2, %%d4:%%d1 \n\t" \
00295 "addxl %%d3, %%d1 \n\t" \
00296 "addxl %%d0, %%d4 \n\t" \
00297 "addl %%d1, %%a3@+ \n\t" \
00298 "movel %%a2@+, %%d1 \n\t" \
00299 "mulul %%d2, %%d3:%%d1 \n\t" \
00300 "addxl %%d4, %%d1 \n\t" \
00301 "addxl %%d0, %%d3 \n\t" \
00302 "addl %%d1, %%a3@+ \n\t" \
00303 "movel %%a2@+, %%d1 \n\t" \
00304 "mulul %%d2, %%d4:%%d1 \n\t" \
00305 "addxl %%d3, %%d1 \n\t" \
00306 "addxl %%d0, %%d4 \n\t" \
00307 "addl %%d1, %%a3@+ \n\t" \
00308 "movel %%a2@+, %%d1 \n\t" \
00309 "mulul %%d2, %%d3:%%d1 \n\t" \
00310 "addxl %%d4, %%d1 \n\t" \
00311 "addxl %%d0, %%d3 \n\t" \
00312 "addl %%d1, %%a3@+ \n\t" \
00313 "addxl %%d0, %%d3 \n\t"
00314
00315 #endif
00316
00317 #if defined(__powerpc64__) || defined(__ppc64__)
00318
00319 #if defined(__MACH__) && defined(__APPLE__)
00320
00321 #define MULADDC_INIT \
00322 asm( \
00323 "ld r3, %3 \n\t" \
00324 "ld r4, %4 \n\t" \
00325 "ld r5, %5 \n\t" \
00326 "ld r6, %6 \n\t" \
00327 "addi r3, r3, -8 \n\t" \
00328 "addi r4, r4, -8 \n\t" \
00329 "addic r5, r5, 0 \n\t"
00330
00331 #define MULADDC_CORE \
00332 "ldu r7, 8(r3) \n\t" \
00333 "mulld r8, r7, r6 \n\t" \
00334 "mulhdu r9, r7, r6 \n\t" \
00335 "adde r8, r8, r5 \n\t" \
00336 "ld r7, 8(r4) \n\t" \
00337 "addze r5, r9 \n\t" \
00338 "addc r8, r8, r7 \n\t" \
00339 "stdu r8, 8(r4) \n\t"
00340
00341 #define MULADDC_STOP \
00342 "addze r5, r5 \n\t" \
00343 "addi r4, r4, 8 \n\t" \
00344 "addi r3, r3, 8 \n\t" \
00345 "std r5, %0 \n\t" \
00346 "std r4, %1 \n\t" \
00347 "std r3, %2 \n\t" \
00348 : "=m" (c), "=m" (d), "=m" (s) \
00349 : "m" (s), "m" (d), "m" (c), "m" (b) \
00350 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00351 );
00352
00353
00354 #else
00355
00356 #define MULADDC_INIT \
00357 asm( \
00358 "ld %%r3, %3 \n\t" \
00359 "ld %%r4, %4 \n\t" \
00360 "ld %%r5, %5 \n\t" \
00361 "ld %%r6, %6 \n\t" \
00362 "addi %%r3, %%r3, -8 \n\t" \
00363 "addi %%r4, %%r4, -8 \n\t" \
00364 "addic %%r5, %%r5, 0 \n\t"
00365
00366 #define MULADDC_CORE \
00367 "ldu %%r7, 8(%%r3) \n\t" \
00368 "mulld %%r8, %%r7, %%r6 \n\t" \
00369 "mulhdu %%r9, %%r7, %%r6 \n\t" \
00370 "adde %%r8, %%r8, %%r5 \n\t" \
00371 "ld %%r7, 8(%%r4) \n\t" \
00372 "addze %%r5, %%r9 \n\t" \
00373 "addc %%r8, %%r8, %%r7 \n\t" \
00374 "stdu %%r8, 8(%%r4) \n\t"
00375
00376 #define MULADDC_STOP \
00377 "addze %%r5, %%r5 \n\t" \
00378 "addi %%r4, %%r4, 8 \n\t" \
00379 "addi %%r3, %%r3, 8 \n\t" \
00380 "std %%r5, %0 \n\t" \
00381 "std %%r4, %1 \n\t" \
00382 "std %%r3, %2 \n\t" \
00383 : "=m" (c), "=m" (d), "=m" (s) \
00384 : "m" (s), "m" (d), "m" (c), "m" (b) \
00385 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00386 );
00387
00388 #endif
00389
00390 #elif defined(__powerpc__) || defined(__ppc__)
00391
00392 #if defined(__MACH__) && defined(__APPLE__)
00393
00394 #define MULADDC_INIT \
00395 asm( \
00396 "lwz r3, %3 \n\t" \
00397 "lwz r4, %4 \n\t" \
00398 "lwz r5, %5 \n\t" \
00399 "lwz r6, %6 \n\t" \
00400 "addi r3, r3, -4 \n\t" \
00401 "addi r4, r4, -4 \n\t" \
00402 "addic r5, r5, 0 \n\t"
00403
00404 #define MULADDC_CORE \
00405 "lwzu r7, 4(r3) \n\t" \
00406 "mullw r8, r7, r6 \n\t" \
00407 "mulhwu r9, r7, r6 \n\t" \
00408 "adde r8, r8, r5 \n\t" \
00409 "lwz r7, 4(r4) \n\t" \
00410 "addze r5, r9 \n\t" \
00411 "addc r8, r8, r7 \n\t" \
00412 "stwu r8, 4(r4) \n\t"
00413
00414 #define MULADDC_STOP \
00415 "addze r5, r5 \n\t" \
00416 "addi r4, r4, 4 \n\t" \
00417 "addi r3, r3, 4 \n\t" \
00418 "stw r5, %0 \n\t" \
00419 "stw r4, %1 \n\t" \
00420 "stw r3, %2 \n\t" \
00421 : "=m" (c), "=m" (d), "=m" (s) \
00422 : "m" (s), "m" (d), "m" (c), "m" (b) \
00423 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00424 );
00425
00426 #else
00427
00428 #define MULADDC_INIT \
00429 asm( \
00430 "lwz %%r3, %3 \n\t" \
00431 "lwz %%r4, %4 \n\t" \
00432 "lwz %%r5, %5 \n\t" \
00433 "lwz %%r6, %6 \n\t" \
00434 "addi %%r3, %%r3, -4 \n\t" \
00435 "addi %%r4, %%r4, -4 \n\t" \
00436 "addic %%r5, %%r5, 0 \n\t"
00437
00438 #define MULADDC_CORE \
00439 "lwzu %%r7, 4(%%r3) \n\t" \
00440 "mullw %%r8, %%r7, %%r6 \n\t" \
00441 "mulhwu %%r9, %%r7, %%r6 \n\t" \
00442 "adde %%r8, %%r8, %%r5 \n\t" \
00443 "lwz %%r7, 4(%%r4) \n\t" \
00444 "addze %%r5, %%r9 \n\t" \
00445 "addc %%r8, %%r8, %%r7 \n\t" \
00446 "stwu %%r8, 4(%%r4) \n\t"
00447
00448 #define MULADDC_STOP \
00449 "addze %%r5, %%r5 \n\t" \
00450 "addi %%r4, %%r4, 4 \n\t" \
00451 "addi %%r3, %%r3, 4 \n\t" \
00452 "stw %%r5, %0 \n\t" \
00453 "stw %%r4, %1 \n\t" \
00454 "stw %%r3, %2 \n\t" \
00455 : "=m" (c), "=m" (d), "=m" (s) \
00456 : "m" (s), "m" (d), "m" (c), "m" (b) \
00457 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00458 );
00459
00460 #endif
00461
00462 #endif
00463
00464
00465
00466
00467
00468 #if 0 && defined(__sparc__)
00469 #if defined(__sparc64__)
00470
00471 #define MULADDC_INIT \
00472 asm( \
00473 "ldx %3, %%o0 \n\t" \
00474 "ldx %4, %%o1 \n\t" \
00475 "ld %5, %%o2 \n\t" \
00476 "ld %6, %%o3 \n\t"
00477
00478 #define MULADDC_CORE \
00479 "ld [%%o0], %%o4 \n\t" \
00480 "inc 4, %%o0 \n\t" \
00481 "ld [%%o1], %%o5 \n\t" \
00482 "umul %%o3, %%o4, %%o4 \n\t" \
00483 "addcc %%o4, %%o2, %%o4 \n\t" \
00484 "rd %%y, %%g1 \n\t" \
00485 "addx %%g1, 0, %%g1 \n\t" \
00486 "addcc %%o4, %%o5, %%o4 \n\t" \
00487 "st %%o4, [%%o1] \n\t" \
00488 "addx %%g1, 0, %%o2 \n\t" \
00489 "inc 4, %%o1 \n\t"
00490
00491 #define MULADDC_STOP \
00492 "st %%o2, %0 \n\t" \
00493 "stx %%o1, %1 \n\t" \
00494 "stx %%o0, %2 \n\t" \
00495 : "=m" (c), "=m" (d), "=m" (s) \
00496 : "m" (s), "m" (d), "m" (c), "m" (b) \
00497 : "g1", "o0", "o1", "o2", "o3", "o4", \
00498 "o5" \
00499 );
00500
00501 #else
00502
00503 #define MULADDC_INIT \
00504 asm( \
00505 "ld %3, %%o0 \n\t" \
00506 "ld %4, %%o1 \n\t" \
00507 "ld %5, %%o2 \n\t" \
00508 "ld %6, %%o3 \n\t"
00509
00510 #define MULADDC_CORE \
00511 "ld [%%o0], %%o4 \n\t" \
00512 "inc 4, %%o0 \n\t" \
00513 "ld [%%o1], %%o5 \n\t" \
00514 "umul %%o3, %%o4, %%o4 \n\t" \
00515 "addcc %%o4, %%o2, %%o4 \n\t" \
00516 "rd %%y, %%g1 \n\t" \
00517 "addx %%g1, 0, %%g1 \n\t" \
00518 "addcc %%o4, %%o5, %%o4 \n\t" \
00519 "st %%o4, [%%o1] \n\t" \
00520 "addx %%g1, 0, %%o2 \n\t" \
00521 "inc 4, %%o1 \n\t"
00522
00523 #define MULADDC_STOP \
00524 "st %%o2, %0 \n\t" \
00525 "st %%o1, %1 \n\t" \
00526 "st %%o0, %2 \n\t" \
00527 : "=m" (c), "=m" (d), "=m" (s) \
00528 : "m" (s), "m" (d), "m" (c), "m" (b) \
00529 : "g1", "o0", "o1", "o2", "o3", "o4", \
00530 "o5" \
00531 );
00532
00533 #endif
00534 #endif
00535
00536 #if defined(__microblaze__) || defined(microblaze)
00537
00538 #define MULADDC_INIT \
00539 asm( \
00540 "lwi r3, %3 \n\t" \
00541 "lwi r4, %4 \n\t" \
00542 "lwi r5, %5 \n\t" \
00543 "lwi r6, %6 \n\t" \
00544 "andi r7, r6, 0xffff \n\t" \
00545 "bsrli r6, r6, 16 \n\t"
00546
00547 #define MULADDC_CORE \
00548 "lhui r8, r3, 0 \n\t" \
00549 "addi r3, r3, 2 \n\t" \
00550 "lhui r9, r3, 0 \n\t" \
00551 "addi r3, r3, 2 \n\t" \
00552 "mul r10, r9, r6 \n\t" \
00553 "mul r11, r8, r7 \n\t" \
00554 "mul r12, r9, r7 \n\t" \
00555 "mul r13, r8, r6 \n\t" \
00556 "bsrli r8, r10, 16 \n\t" \
00557 "bsrli r9, r11, 16 \n\t" \
00558 "add r13, r13, r8 \n\t" \
00559 "add r13, r13, r9 \n\t" \
00560 "bslli r10, r10, 16 \n\t" \
00561 "bslli r11, r11, 16 \n\t" \
00562 "add r12, r12, r10 \n\t" \
00563 "addc r13, r13, r0 \n\t" \
00564 "add r12, r12, r11 \n\t" \
00565 "addc r13, r13, r0 \n\t" \
00566 "lwi r10, r4, 0 \n\t" \
00567 "add r12, r12, r10 \n\t" \
00568 "addc r13, r13, r0 \n\t" \
00569 "add r12, r12, r5 \n\t" \
00570 "addc r5, r13, r0 \n\t" \
00571 "swi r12, r4, 0 \n\t" \
00572 "addi r4, r4, 4 \n\t"
00573
00574 #define MULADDC_STOP \
00575 "swi r5, %0 \n\t" \
00576 "swi r4, %1 \n\t" \
00577 "swi r3, %2 \n\t" \
00578 : "=m" (c), "=m" (d), "=m" (s) \
00579 : "m" (s), "m" (d), "m" (c), "m" (b) \
00580 : "r3", "r4", "r5", "r6", "r7", "r8", \
00581 "r9", "r10", "r11", "r12", "r13" \
00582 );
00583
00584 #endif
00585
00586 #if defined(__tricore__)
00587
00588 #define MULADDC_INIT \
00589 asm( \
00590 "ld.a %%a2, %3 \n\t" \
00591 "ld.a %%a3, %4 \n\t" \
00592 "ld.w %%d4, %5 \n\t" \
00593 "ld.w %%d1, %6 \n\t" \
00594 "xor %%d5, %%d5 \n\t"
00595
00596 #define MULADDC_CORE \
00597 "ld.w %%d0, [%%a2+] \n\t" \
00598 "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \
00599 "ld.w %%d0, [%%a3] \n\t" \
00600 "addx %%d2, %%d2, %%d0 \n\t" \
00601 "addc %%d3, %%d3, 0 \n\t" \
00602 "mov %%d4, %%d3 \n\t" \
00603 "st.w [%%a3+], %%d2 \n\t"
00604
00605 #define MULADDC_STOP \
00606 "st.w %0, %%d4 \n\t" \
00607 "st.a %1, %%a3 \n\t" \
00608 "st.a %2, %%a2 \n\t" \
00609 : "=m" (c), "=m" (d), "=m" (s) \
00610 : "m" (s), "m" (d), "m" (c), "m" (b) \
00611 : "d0", "d1", "e2", "d4", "a2", "a3" \
00612 );
00613
00614 #endif
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628 #if defined(__GNUC__) && !defined(__OPTIMIZE__)
00629 #define MULADDC_CANNOT_USE_R7
00630 #endif
00631
00632 #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
00633
00634 #if defined(__thumb__) && !defined(__thumb2__)
00635
00636 #define MULADDC_INIT \
00637 asm( \
00638 "ldr r0, %3 \n\t" \
00639 "ldr r1, %4 \n\t" \
00640 "ldr r2, %5 \n\t" \
00641 "ldr r3, %6 \n\t" \
00642 "lsr r7, r3, #16 \n\t" \
00643 "mov r9, r7 \n\t" \
00644 "lsl r7, r3, #16 \n\t" \
00645 "lsr r7, r7, #16 \n\t" \
00646 "mov r8, r7 \n\t"
00647
00648 #define MULADDC_CORE \
00649 "ldmia r0!, {r6} \n\t" \
00650 "lsr r7, r6, #16 \n\t" \
00651 "lsl r6, r6, #16 \n\t" \
00652 "lsr r6, r6, #16 \n\t" \
00653 "mov r4, r8 \n\t" \
00654 "mul r4, r6 \n\t" \
00655 "mov r3, r9 \n\t" \
00656 "mul r6, r3 \n\t" \
00657 "mov r5, r9 \n\t" \
00658 "mul r5, r7 \n\t" \
00659 "mov r3, r8 \n\t" \
00660 "mul r7, r3 \n\t" \
00661 "lsr r3, r6, #16 \n\t" \
00662 "add r5, r5, r3 \n\t" \
00663 "lsr r3, r7, #16 \n\t" \
00664 "add r5, r5, r3 \n\t" \
00665 "add r4, r4, r2 \n\t" \
00666 "mov r2, #0 \n\t" \
00667 "adc r5, r2 \n\t" \
00668 "lsl r3, r6, #16 \n\t" \
00669 "add r4, r4, r3 \n\t" \
00670 "adc r5, r2 \n\t" \
00671 "lsl r3, r7, #16 \n\t" \
00672 "add r4, r4, r3 \n\t" \
00673 "adc r5, r2 \n\t" \
00674 "ldr r3, [r1] \n\t" \
00675 "add r4, r4, r3 \n\t" \
00676 "adc r2, r5 \n\t" \
00677 "stmia r1!, {r4} \n\t"
00678
00679 #define MULADDC_STOP \
00680 "str r2, %0 \n\t" \
00681 "str r1, %1 \n\t" \
00682 "str r0, %2 \n\t" \
00683 : "=m" (c), "=m" (d), "=m" (s) \
00684 : "m" (s), "m" (d), "m" (c), "m" (b) \
00685 : "r0", "r1", "r2", "r3", "r4", "r5", \
00686 "r6", "r7", "r8", "r9", "cc" \
00687 );
00688
00689 #else
00690
00691 #define MULADDC_INIT \
00692 asm( \
00693 "ldr r0, %3 \n\t" \
00694 "ldr r1, %4 \n\t" \
00695 "ldr r2, %5 \n\t" \
00696 "ldr r3, %6 \n\t"
00697
00698 #define MULADDC_CORE \
00699 "ldr r4, [r0], #4 \n\t" \
00700 "mov r5, #0 \n\t" \
00701 "ldr r6, [r1] \n\t" \
00702 "umlal r2, r5, r3, r4 \n\t" \
00703 "adds r7, r6, r2 \n\t" \
00704 "adc r2, r5, #0 \n\t" \
00705 "str r7, [r1], #4 \n\t"
00706
00707 #define MULADDC_STOP \
00708 "str r2, %0 \n\t" \
00709 "str r1, %1 \n\t" \
00710 "str r0, %2 \n\t" \
00711 : "=m" (c), "=m" (d), "=m" (s) \
00712 : "m" (s), "m" (d), "m" (c), "m" (b) \
00713 : "r0", "r1", "r2", "r3", "r4", "r5", \
00714 "r6", "r7", "cc" \
00715 );
00716
00717 #endif
00718
00719 #endif
00720
00721 #if defined(__alpha__)
00722
00723 #define MULADDC_INIT \
00724 asm( \
00725 "ldq $1, %3 \n\t" \
00726 "ldq $2, %4 \n\t" \
00727 "ldq $3, %5 \n\t" \
00728 "ldq $4, %6 \n\t"
00729
00730 #define MULADDC_CORE \
00731 "ldq $6, 0($1) \n\t" \
00732 "addq $1, 8, $1 \n\t" \
00733 "mulq $6, $4, $7 \n\t" \
00734 "umulh $6, $4, $6 \n\t" \
00735 "addq $7, $3, $7 \n\t" \
00736 "cmpult $7, $3, $3 \n\t" \
00737 "ldq $5, 0($2) \n\t" \
00738 "addq $7, $5, $7 \n\t" \
00739 "cmpult $7, $5, $5 \n\t" \
00740 "stq $7, 0($2) \n\t" \
00741 "addq $2, 8, $2 \n\t" \
00742 "addq $6, $3, $3 \n\t" \
00743 "addq $5, $3, $3 \n\t"
00744
00745 #define MULADDC_STOP \
00746 "stq $3, %0 \n\t" \
00747 "stq $2, %1 \n\t" \
00748 "stq $1, %2 \n\t" \
00749 : "=m" (c), "=m" (d), "=m" (s) \
00750 : "m" (s), "m" (d), "m" (c), "m" (b) \
00751 : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \
00752 );
00753 #endif
00754
00755 #if defined(__mips__) && !defined(__mips64)
00756
00757 #define MULADDC_INIT \
00758 asm( \
00759 "lw $10, %3 \n\t" \
00760 "lw $11, %4 \n\t" \
00761 "lw $12, %5 \n\t" \
00762 "lw $13, %6 \n\t"
00763
00764 #define MULADDC_CORE \
00765 "lw $14, 0($10) \n\t" \
00766 "multu $13, $14 \n\t" \
00767 "addi $10, $10, 4 \n\t" \
00768 "mflo $14 \n\t" \
00769 "mfhi $9 \n\t" \
00770 "addu $14, $12, $14 \n\t" \
00771 "lw $15, 0($11) \n\t" \
00772 "sltu $12, $14, $12 \n\t" \
00773 "addu $15, $14, $15 \n\t" \
00774 "sltu $14, $15, $14 \n\t" \
00775 "addu $12, $12, $9 \n\t" \
00776 "sw $15, 0($11) \n\t" \
00777 "addu $12, $12, $14 \n\t" \
00778 "addi $11, $11, 4 \n\t"
00779
00780 #define MULADDC_STOP \
00781 "sw $12, %0 \n\t" \
00782 "sw $11, %1 \n\t" \
00783 "sw $10, %2 \n\t" \
00784 : "=m" (c), "=m" (d), "=m" (s) \
00785 : "m" (s), "m" (d), "m" (c), "m" (b) \
00786 : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \
00787 );
00788
00789 #endif
00790 #endif
00791
00792 #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
00793
00794 #define MULADDC_INIT \
00795 __asm mov esi, s \
00796 __asm mov edi, d \
00797 __asm mov ecx, c \
00798 __asm mov ebx, b
00799
00800 #define MULADDC_CORE \
00801 __asm lodsd \
00802 __asm mul ebx \
00803 __asm add eax, ecx \
00804 __asm adc edx, 0 \
00805 __asm add eax, [edi] \
00806 __asm adc edx, 0 \
00807 __asm mov ecx, edx \
00808 __asm stosd
00809
00810 #if defined(MBEDTLS_HAVE_SSE2)
00811
00812 #define EMIT __asm _emit
00813
00814 #define MULADDC_HUIT \
00815 EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
00816 EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
00817 EMIT 0x0F EMIT 0x6E EMIT 0x1F \
00818 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00819 EMIT 0x0F EMIT 0x6E EMIT 0x16 \
00820 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
00821 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
00822 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
00823 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
00824 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
00825 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
00826 EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
00827 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
00828 EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
00829 EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
00830 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
00831 EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
00832 EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
00833 EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
00834 EMIT 0x0F EMIT 0x7E EMIT 0x0F \
00835 EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
00836 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
00837 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00838 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
00839 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
00840 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00841 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
00842 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
00843 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
00844 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00845 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
00846 EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
00847 EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
00848 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
00849 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
00850 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
00851 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00852 EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
00853 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
00854 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
00855 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
00856 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00857 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
00858 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
00859 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
00860 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
00861 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00862 EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
00863 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
00864 EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
00865 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
00866 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00867 EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
00868 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
00869 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00870 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00871 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
00872 EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
00873 EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
00874 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00875 EMIT 0x0F EMIT 0x7E EMIT 0xC9
00876
00877 #define MULADDC_STOP \
00878 EMIT 0x0F EMIT 0x77 \
00879 __asm mov c, ecx \
00880 __asm mov d, edi \
00881 __asm mov s, esi \
00882
00883 #else
00884
00885 #define MULADDC_STOP \
00886 __asm mov c, ecx \
00887 __asm mov d, edi \
00888 __asm mov s, esi \
00889
00890 #endif
00891 #endif
00892
00893 #endif
00894
00895 #if !defined(MULADDC_CORE)
00896 #if defined(MBEDTLS_HAVE_UDBL)
00897
00898 #define MULADDC_INIT \
00899 { \
00900 mbedtls_t_udbl r; \
00901 mbedtls_mpi_uint r0, r1;
00902
00903 #define MULADDC_CORE \
00904 r = *(s++) * (mbedtls_t_udbl) b; \
00905 r0 = (mbedtls_mpi_uint) r; \
00906 r1 = (mbedtls_mpi_uint)( r >> biL ); \
00907 r0 += c; r1 += (r0 < c); \
00908 r0 += *d; r1 += (r0 < *d); \
00909 c = r1; *(d++) = r0;
00910
00911 #define MULADDC_STOP \
00912 }
00913
00914 #else
00915 #define MULADDC_INIT \
00916 { \
00917 mbedtls_mpi_uint s0, s1, b0, b1; \
00918 mbedtls_mpi_uint r0, r1, rx, ry; \
00919 b0 = ( b << biH ) >> biH; \
00920 b1 = ( b >> biH );
00921
00922 #define MULADDC_CORE \
00923 s0 = ( *s << biH ) >> biH; \
00924 s1 = ( *s >> biH ); s++; \
00925 rx = s0 * b1; r0 = s0 * b0; \
00926 ry = s1 * b0; r1 = s1 * b1; \
00927 r1 += ( rx >> biH ); \
00928 r1 += ( ry >> biH ); \
00929 rx <<= biH; ry <<= biH; \
00930 r0 += rx; r1 += (r0 < rx); \
00931 r0 += ry; r1 += (r0 < ry); \
00932 r0 += c; r1 += (r0 < c); \
00933 r0 += *d; r1 += (r0 < *d); \
00934 c = r1; *(d++) = r0;
00935
00936 #define MULADDC_STOP \
00937 }
00938
00939 #endif
00940 #endif
00941
00942 #endif