| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | /* rhash_timing.c - functions to benchmark hash algorithms, | 
| 2 |  |  |  |  |  |  | * | 
| 3 |  |  |  |  |  |  | * Copyright: 2010-2012 Aleksey Kravchenko | 
| 4 |  |  |  |  |  |  | * | 
| 5 |  |  |  |  |  |  | * Permission is hereby granted,  free of charge,  to any person  obtaining a | 
| 6 |  |  |  |  |  |  | * copy of this software and associated documentation files (the "Software"), | 
| 7 |  |  |  |  |  |  | * to deal in the Software without restriction,  including without limitation | 
| 8 |  |  |  |  |  |  | * the rights to  use, copy, modify,  merge, publish, distribute, sublicense, | 
| 9 |  |  |  |  |  |  | * and/or sell copies  of  the Software,  and to permit  persons  to whom the | 
| 10 |  |  |  |  |  |  | * Software is furnished to do so. | 
| 11 |  |  |  |  |  |  | * | 
| 12 |  |  |  |  |  |  | * This program  is  distributed  in  the  hope  that it will be useful,  but | 
| 13 |  |  |  |  |  |  | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | 
| 14 |  |  |  |  |  |  | * or FITNESS FOR A PARTICULAR PURPOSE.  Use this program  at  your own risk! | 
| 15 |  |  |  |  |  |  | */ | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | /* modifier for Windows dll */ | 
| 18 |  |  |  |  |  |  | #if (defined(_WIN32) || defined(__CYGWIN__)) && defined(RHASH_EXPORTS) | 
| 19 |  |  |  |  |  |  | # define RHASH_API __declspec(dllexport) | 
| 20 |  |  |  |  |  |  | #endif | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | #include "byte_order.h" | 
| 23 |  |  |  |  |  |  | #include "rhash.h" | 
| 24 |  |  |  |  |  |  | #include "rhash_timing.h" | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | /* DEFINE read_tsc() if possible */ | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | #if (defined(CPU_IA32)) || defined(CPU_X64) | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | #if defined( _MSC_VER ) /* if MS VC */ | 
| 31 |  |  |  |  |  |  | # include | 
| 32 |  |  |  |  |  |  | # pragma intrinsic( __rdtsc ) | 
| 33 |  |  |  |  |  |  | # define read_tsc() __rdtsc() | 
| 34 |  |  |  |  |  |  | # define HAVE_TSC | 
| 35 |  |  |  |  |  |  | #elif defined( __GNUC__ ) /* if GCC */ | 
| 36 | 0 |  |  |  |  |  | static uint64_t read_tsc(void) { | 
| 37 |  |  |  |  |  |  | unsigned long lo, hi; | 
| 38 | 0 |  |  |  |  |  | __asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); | 
| 39 | 0 |  |  |  |  |  | return (((uint64_t)hi) << 32) + lo; | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  | # define HAVE_TSC | 
| 42 |  |  |  |  |  |  | #endif /* _MSC_VER, __GNUC__ */ | 
| 43 |  |  |  |  |  |  | #endif /* CPU_IA32, CPU_X64 */ | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | /* TIMER FUNCTIONS */ | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | #if defined(_WIN32) || defined(__CYGWIN__) | 
| 49 |  |  |  |  |  |  | #include | 
| 50 |  |  |  |  |  |  | #define get_timedelta(delta) QueryPerformanceCounter((LARGE_INTEGER*)delta) | 
| 51 |  |  |  |  |  |  | #else | 
| 52 |  |  |  |  |  |  | #define get_timedelta(delta) gettimeofday(delta, NULL) | 
| 53 |  |  |  |  |  |  | #endif | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | /** | 
| 56 |  |  |  |  |  |  | * Return real-value representing number of seconds | 
| 57 |  |  |  |  |  |  | * stored in the given timeval structure. | 
| 58 |  |  |  |  |  |  | * The function is used with timers, when printing time statistics. | 
| 59 |  |  |  |  |  |  | * | 
| 60 |  |  |  |  |  |  | * @deprecated This function shall be removed soon. | 
| 61 |  |  |  |  |  |  | * | 
| 62 |  |  |  |  |  |  | * @param delta time delta to be converted | 
| 63 |  |  |  |  |  |  | * @return number of seconds | 
| 64 |  |  |  |  |  |  | */ | 
| 65 | 0 |  |  |  |  |  | static double fsec(timedelta_t* timer) | 
| 66 |  |  |  |  |  |  | { | 
| 67 |  |  |  |  |  |  | #if defined(_WIN32) || defined(__CYGWIN__) | 
| 68 |  |  |  |  |  |  | LARGE_INTEGER freq; | 
| 69 |  |  |  |  |  |  | QueryPerformanceFrequency(&freq); | 
| 70 |  |  |  |  |  |  | return (double)*timer / freq.QuadPart; | 
| 71 |  |  |  |  |  |  | #else | 
| 72 | 0 |  |  |  |  |  | return ((double)timer->tv_usec / 1000000.0) + timer->tv_sec; | 
| 73 |  |  |  |  |  |  | #endif | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 0 |  |  |  |  |  | void rhash_timer_start(timedelta_t* timer) | 
| 77 |  |  |  |  |  |  | { | 
| 78 | 0 |  |  |  |  |  | get_timedelta(timer); | 
| 79 | 0 |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 0 |  |  |  |  |  | double rhash_timer_stop(timedelta_t* timer) | 
| 82 |  |  |  |  |  |  | { | 
| 83 |  |  |  |  |  |  | timedelta_t end; | 
| 84 | 0 |  |  |  |  |  | get_timedelta(&end); | 
| 85 |  |  |  |  |  |  | #if defined(_WIN32) || defined(__CYGWIN__) | 
| 86 |  |  |  |  |  |  | *timer = end - *timer; | 
| 87 |  |  |  |  |  |  | #else | 
| 88 | 0 |  |  |  |  |  | timer->tv_sec  = end.tv_sec  - timer->tv_sec - (end.tv_usec >= timer->tv_usec ? 0 : 1); | 
| 89 | 0 | 0 |  |  |  |  | timer->tv_usec = end.tv_usec + (end.tv_usec >= timer->tv_usec ? 0 : 1000000 ) - timer->tv_usec; | 
| 90 |  |  |  |  |  |  | #endif | 
| 91 | 0 |  |  |  |  |  | return fsec(timer); | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | #if defined(_WIN32) || defined(__CYGWIN__) | 
| 95 |  |  |  |  |  |  | /** | 
| 96 |  |  |  |  |  |  | * Set process priority and affinity to use all cpu's but the first one. | 
| 97 |  |  |  |  |  |  | * This improves benchmark results on a multi-cpu systems. | 
| 98 |  |  |  |  |  |  | * | 
| 99 |  |  |  |  |  |  | * @deprecated This function shall be removed soon. | 
| 100 |  |  |  |  |  |  | */ | 
| 101 |  |  |  |  |  |  | static void benchmark_cpu_init(void) | 
| 102 |  |  |  |  |  |  | { | 
| 103 |  |  |  |  |  |  | DWORD_PTR dwProcessMask, dwSysMask, dwDesired; | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS); | 
| 106 |  |  |  |  |  |  | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | if ( GetProcessAffinityMask(GetCurrentProcess(), &dwProcessMask, &dwSysMask) ) { | 
| 109 |  |  |  |  |  |  | dwDesired = dwSysMask & (dwProcessMask & ~1); /* remove the first processor */ | 
| 110 |  |  |  |  |  |  | dwDesired = (dwDesired ? dwDesired : dwSysMask & ~1); | 
| 111 |  |  |  |  |  |  | if (dwDesired != 0) { | 
| 112 |  |  |  |  |  |  | SetProcessAffinityMask(GetCurrentProcess(), dwDesired); | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  | #endif | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | /** | 
| 119 |  |  |  |  |  |  | * Hash a repeated message chunk by specified hash function. | 
| 120 |  |  |  |  |  |  | * | 
| 121 |  |  |  |  |  |  | * @deprecated This function shall be removed soon. | 
| 122 |  |  |  |  |  |  | * | 
| 123 |  |  |  |  |  |  | * @param hash_id hash function identifier | 
| 124 |  |  |  |  |  |  | * @param message a message chunk to hash | 
| 125 |  |  |  |  |  |  | * @param msg_size message chunk size | 
| 126 |  |  |  |  |  |  | * @param count number of chunks | 
| 127 |  |  |  |  |  |  | * @param out computed hash | 
| 128 |  |  |  |  |  |  | * @return 1 on success, 0 on error | 
| 129 |  |  |  |  |  |  | */ | 
| 130 | 0 |  |  |  |  |  | static int hash_in_loop(unsigned hash_id, const unsigned char* message, size_t msg_size, int count, unsigned char* out) | 
| 131 |  |  |  |  |  |  | { | 
| 132 |  |  |  |  |  |  | int i; | 
| 133 | 0 |  |  |  |  |  | struct rhash_context *context = rhash_init(hash_id); | 
| 134 | 0 | 0 |  |  |  |  | if (!context) return 0; | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | /* process the repeated message buffer */ | 
| 137 | 0 | 0 |  |  |  |  | for (i = 0; i < count; i++) rhash_update(context, message, msg_size); | 
| 138 | 0 |  |  |  |  |  | rhash_final(context, out); | 
| 139 | 0 |  |  |  |  |  | rhash_free(context); | 
| 140 | 0 |  |  |  |  |  | return 1; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 0 |  |  |  |  |  | void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output) | 
| 144 |  |  |  |  |  |  | { | 
| 145 |  |  |  |  |  |  | unsigned char ALIGN_ATTR(64) message[8192]; /* 8 KiB */ | 
| 146 |  |  |  |  |  |  | timedelta_t timer; | 
| 147 |  |  |  |  |  |  | int i, j; | 
| 148 |  |  |  |  |  |  | size_t sz_mb, msg_size; | 
| 149 | 0 |  |  |  |  |  | double time, total_time = 0; | 
| 150 | 0 |  |  |  |  |  | const int rounds = 4; | 
| 151 |  |  |  |  |  |  | const char* hash_name; | 
| 152 |  |  |  |  |  |  | unsigned char out[130]; | 
| 153 |  |  |  |  |  |  | #ifdef HAVE_TSC | 
| 154 | 0 |  |  |  |  |  | double cpb = 0; | 
| 155 |  |  |  |  |  |  | #endif /* HAVE_TSC */ | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | #if defined(_WIN32) || defined(__CYGWIN__) | 
| 158 |  |  |  |  |  |  | benchmark_cpu_init(); /* set cpu affinity to improve test results */ | 
| 159 |  |  |  |  |  |  | #endif | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | /* set message size for fast and slow hash functions */ | 
| 162 | 0 |  |  |  |  |  | msg_size = 1073741824 / 2; | 
| 163 | 0 | 0 |  |  |  |  | if (hash_id & (RHASH_WHIRLPOOL | RHASH_SNEFRU128 | RHASH_SNEFRU256 | RHASH_SHA3_224 | RHASH_SHA3_256 | RHASH_SHA3_384 | RHASH_SHA3_512)) { | 
| 164 | 0 |  |  |  |  |  | msg_size /= 8; | 
| 165 | 0 | 0 |  |  |  |  | } else if (hash_id & (RHASH_GOST | RHASH_GOST_CRYPTOPRO | RHASH_SHA384 | RHASH_SHA512)) { | 
| 166 | 0 |  |  |  |  |  | msg_size /= 2; | 
| 167 |  |  |  |  |  |  | } | 
| 168 | 0 |  |  |  |  |  | sz_mb = msg_size / (1 << 20); /* size in MiB */ | 
| 169 | 0 |  |  |  |  |  | hash_name = rhash_get_name(hash_id); | 
| 170 | 0 | 0 |  |  |  |  | if (!hash_name) hash_name = ""; /* benchmarking several hashes*/ | 
| 171 |  |  |  |  |  |  |  | 
| 172 | 0 | 0 |  |  |  |  | for (i = 0; i < (int)sizeof(message); i++) message[i] = i & 0xff; | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 0 | 0 |  |  |  |  | for (j = 0; j < rounds; j++) { | 
| 175 | 0 |  |  |  |  |  | rhash_timer_start(&timer); | 
| 176 | 0 |  |  |  |  |  | hash_in_loop(hash_id, message, sizeof(message), (int)(msg_size / sizeof(message)), out); | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 0 |  |  |  |  |  | time = rhash_timer_stop(&timer); | 
| 179 | 0 |  |  |  |  |  | total_time += time; | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 0 | 0 |  |  |  |  | if ((flags & (RHASH_BENCHMARK_QUIET | RHASH_BENCHMARK_RAW)) == 0) { | 
| 182 | 0 |  |  |  |  |  | fprintf(output, "%s %u MiB calculated in %.3f sec, %.3f MBps\n", hash_name, (unsigned)sz_mb, time, (double)sz_mb / time); | 
| 183 | 0 |  |  |  |  |  | fflush(output); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | #if defined(HAVE_TSC) | 
| 188 |  |  |  |  |  |  | /* measure the CPU "clocks per byte" speed */ | 
| 189 | 0 | 0 |  |  |  |  | if (flags & RHASH_BENCHMARK_CPB) { | 
| 190 | 0 |  |  |  |  |  | unsigned int c1 = -1, c2 = -1; | 
| 191 |  |  |  |  |  |  | unsigned volatile long long cy0, cy1, cy2; | 
| 192 | 0 |  |  |  |  |  | int msg_size = 128 * 1024; | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | /* make 200 tries */ | 
| 195 | 0 | 0 |  |  |  |  | for (i = 0; i < 200; i++) { | 
| 196 | 0 |  |  |  |  |  | cy0 = read_tsc(); | 
| 197 | 0 |  |  |  |  |  | hash_in_loop(hash_id, message, sizeof(message), msg_size / sizeof(message), out); | 
| 198 | 0 |  |  |  |  |  | cy1 = read_tsc(); | 
| 199 | 0 |  |  |  |  |  | hash_in_loop(hash_id, message, sizeof(message), msg_size / sizeof(message), out); | 
| 200 | 0 |  |  |  |  |  | hash_in_loop(hash_id, message, sizeof(message), msg_size / sizeof(message), out); | 
| 201 | 0 |  |  |  |  |  | cy2 = read_tsc(); | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 0 |  |  |  |  |  | cy2 -= cy1; | 
| 204 | 0 |  |  |  |  |  | cy1 -= cy0; | 
| 205 | 0 | 0 |  |  |  |  | c1 = (unsigned int)(c1 > cy1 ? cy1 : c1); | 
| 206 | 0 | 0 |  |  |  |  | c2 = (unsigned int)(c2 > cy2 ? cy2 : c2); | 
| 207 |  |  |  |  |  |  | } | 
| 208 | 0 |  |  |  |  |  | cpb = ((c2 - c1) + 1) / (double)msg_size; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  | #endif /* HAVE_TSC */ | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 0 | 0 |  |  |  |  | if (flags & RHASH_BENCHMARK_RAW) { | 
| 213 |  |  |  |  |  |  | /* output result in a "raw" machine-readable format */ | 
| 214 | 0 |  |  |  |  |  | fprintf(output, "%s\t%u\t%.3f\t%.3f", hash_name, ((unsigned)sz_mb * rounds), total_time, (double)(sz_mb * rounds) / total_time); | 
| 215 |  |  |  |  |  |  | #if defined(HAVE_TSC) | 
| 216 | 0 | 0 |  |  |  |  | if (flags & RHASH_BENCHMARK_CPB) fprintf(output, "\t%.2f", cpb); | 
| 217 |  |  |  |  |  |  | #endif /* HAVE_TSC */ | 
| 218 | 0 |  |  |  |  |  | fprintf(output, "\n"); | 
| 219 |  |  |  |  |  |  | } else { | 
| 220 | 0 |  |  |  |  |  | fprintf(output, "%s %u MiB total in %.3f sec, %.3f MBps", hash_name, ((unsigned)sz_mb * rounds), total_time, (double)(sz_mb * rounds) / total_time); | 
| 221 |  |  |  |  |  |  | #if defined(HAVE_TSC) | 
| 222 | 0 | 0 |  |  |  |  | if (flags & RHASH_BENCHMARK_CPB) fprintf(output, ", CPB=%.2f", cpb); | 
| 223 |  |  |  |  |  |  | #endif /* HAVE_TSC */ | 
| 224 | 0 |  |  |  |  |  | fprintf(output, "\n"); | 
| 225 |  |  |  |  |  |  | } | 
| 226 | 0 |  |  |  |  |  | } |