format PE64 Console 5.0 include 'win64axp.inc' ;-- CODE SECTION ------------------------------------------------------------------------------------------------------- .code entry: frame ; -1 is handle of current process (GetCurrentProcess), -2 is handle of current thread (GetCurrentThread) invoke SetProcessAffinityMask, -1, 1 ; to aviod CPU migration invoke SetPriorityClass, -1, REALTIME_PRIORITY_CLASS ; available only when running with administrator rights invoke SetThreadPriority, -2, THREAD_PRIORITY_TIME_CRITICAL stdcall SpeedTestInit, -1 stdcall SpeedTestMsg, test1, 'Testing 1...', <' %llu ticks%s',10>, ' (CPU migration is detected)' stdcall SpeedTestMsg, test2, 'Testing 2...', <' %llu ticks%s',10>, ' (CPU migration is detected)' stdcall SpeedTestMsg, test3, 'Testing 3...', <' %llu ticks%s',10>, ' (CPU migration is detected)' invoke SetThreadPriority, -2, THREAD_PRIORITY_NORMAL invoke SetPriorityClass, -1, NORMAL_PRIORITY_CLASS cinvoke printf, 'Press a key to exit...' cinvoke getch invoke ExitProcess, 0 endf align 16 test1: xor eax,eax cpuid ret align 16 test2: mov ecx,65536 @@: dec ecx jnz @B ret align 16 test3: mov ecx,65536 loop $ ret ;-- SPEED TEST PROCEDURES ---------------------------------------------------------------------------------------------- ; Speed-test procedures for Windows x64, v1.00a ; (c) 2020 by Jin X (jin_x@list.ru) SPEEDTEST_REPEATS = 256 ; number of code execution repeats (must be power of two!!!) SPEEDTEST_WARMUPS = 1 shl (bsr SPEEDTEST_REPEATS / 2) ; number of warming-up executions assert SPEEDTEST_WARMUPS >= 0 & SPEEDTEST_REPEATS > 0 & bsf SPEEDTEST_REPEATS = bsr SPEEDTEST_REPEATS ; Initialize speed-test and show message if needed (via printf) ; Parameters: ecx = show message flags: bit 0 - when rdtscp is NOT supported, bit 1 - when invariant TSC is NOT supported, bit 2 - when everything's ok (ecx = -1 - all messages) ; Returns: rax = unsupported feature flags: bit 0 - rdtscp is NOT supported, bit 1 - invariant TSC is NOT supported (eax = 0 - both features are supported) proc SpeedTestInit uses rbx, MsgFlags frame mov r8b,3 ; temp result mov r9d,ecx ; show message flags mov eax,0x80000000 cpuid mov r10d,eax ; max extended cpuid leaf level ; RDTSCP instruction support check mov eax,0x80000001 cmp r10d,eax jb .no_inv ; both features are NOT supported cpuid bt edx,27 ; rdtscp support bit jnc .no_rdtscp and r8b,not 1 ; mark as supported mov [SpeedTestGetTSC],SpeedTestRDTSCP ; use RDTSC instruction .no_rdtscp: ; Invariant TSC support check mov eax,0x80000007 cmp r10d,eax jb .no_inv cpuid bt edx,8 ; invariant TSC support bit jnc .no_inv and r8b,not 2 ; mark as supported .no_inv: mov bl,r8b mov bh,bl ; save result mask test bl,bl setz cl shl cl,2 or bl,cl ; set bit 2 in r8d if both features are supported and bl,r9b ; bit mask for messages ; Messages test bl,1 jz @F cinvoke printf, <"Warning: RDTSCP instruction is not supported, RDTSC will be used instead (CPU migration can't be detected)!", 10> @@: test bl,2 jz @F cinvoke printf, <"Warning: invariant TSC is not supported (results may be inaccurate)!", 10> @@: test bl,4 jz @F cinvoke printf, <"Success: both RDTSCP instruction and invariant TSC are supported.", 10> @@: ; Measure overhead xor eax,eax mov [SpeedTestOverhead],rax stdcall SpeedTest, SpeedTestEmptyFunc ; overhead test mov [SpeedTestOverhead],rax movzx eax,bh ; results ret endf endp ; SpeedTestInit ; Measure procedure speed and show message (via printf) ; Parameters: ; * rcx = proc address ; * rdx = starting message (0 - no message, -1 - 'Testing...' message); ; * r8 = result message (0 - no message, -1 - just a number of ticks and new line), should contain '%llu' for result TSC count and then '%s' (optional) for CPU migration message (specified by r9); ; * r9 = CPU migration message (optional, should be used only is r8 message contains '%s'). ; Returns: rax = TSC count (always positive value), zf = 1 if no CPU migration is occured proc SpeedTestMsg ProcAddr, PreMsg, ResultMsg, MigMsg SpeedTestMsg% = 0 ; turn off parameter count check frame mov [ProcAddr],rcx mov [ResultMsg],r8 mov [MigMsg],r9 ; Starting message test rdx,rdx jz .no_start cmp rdx,-1 jne @F mov rdx,.testing_msg @@: cinvoke printf, '%s', rdx .no_start: ; Test speed stdcall SpeedTest, [ProcAddr] mov [ProcAddr],rax setz byte [PreMsg] ; save zf ; Result message mov r8,.no_message jz @F ; jump if no CPU migration mov r8,[MigMsg] @@: mov rcx,[ResultMsg] test rcx,rcx jz .no_results cmp rcx,-1 jne @F mov rcx,.just_ticks @@: cinvoke printf, rcx, rax, r8 .no_results: ; Return values mov rax,[ProcAddr] dec byte [PreMsg] ; restore zf ret endf .testing_msg db 'Testing...',0 .just_ticks db ' %llu',10 .no_message db 0 endp ; SpeedTestMsg ; Measure procedure speed ; Parameters: rcx = proc address ; Returns: rax = TSC count (always positive value), zf = 1 if no CPU migration is occured proc SpeedTest uses rbx rsi rdi r12 r13 r14 r15, ProcAddr frame mov r12,rcx ; Warming-up calls if SPEEDTEST_WARMUPS > 0 mov esi,SPEEDTEST_WARMUPS @@: stdcall r12 dec esi jnz @B end if ; Main tests cld mov rdi,SpeedTestResults xor r15d,r15d mov esi,SPEEDTEST_REPEATS @@:; invoke SwitchToThread ; try to update thread time slice invoke SpeedTestGetTSC ; get ticks in rax, CPU id in ecx mov r13,rax mov r14d,ecx stdcall r12 ; main call invoke SpeedTestGetTSC ; get ticks in rax, CPU id in ecx sub rax,r13 sub rax,[SpeedTestOverhead] ; result TSC count stosq ; store to SpeedTestResults sub ecx,r14d ; detect CPU migration or r15d,ecx ; migration flag for all tests dec esi jnz @B if SPEEDTEST_REPEATS > 2 ; Sort results mov rcx,SpeedTestResults mov rdx,SPEEDTEST_REPEATS stdcall InsertionSort64 end if ; Calculate average CPU ticks if SPEEDTEST_REPEATS <= 2 mov ecx,SPEEDTEST_REPEATS else mov ecx,SPEEDTEST_REPEATS/2 ; use only 50% of results from array middle (assuming that 25% at the start and end are errors) end if xor eax,eax xor edx,edx @@: add rax,[SpeedTestResults+(SPEEDTEST_REPEATS/4)*8 + rdx*8] ; sum of all relevant results inc edx dec ecx jnz @B if SPEEDTEST_REPEATS > 2 sar rax,bsr (SPEEDTEST_REPEATS/2) ; average value else if SPEEDTEST_REPEATS = 2 sar rax,bsr SPEEDTEST_REPEATS ; average value test rax,rax end if cmovs eax,ecx ; zero result if negative test r15d,r15d ; zf = 1 if no CPU migration is occured ret endf endp ; SpeedTest ; Read TSC via RDTSC [for internal use] ; Returns: rax = current TSC counter value, ecx = 0 (processor id detection is not supported) ; Changes ebx !!! if used SpeedTestRDTSC SpeedTestRDTSC: xor eax,eax ; cpuid execution time may vary depending on eax value cpuid ; serialization xor ecx,ecx rdtsc shl rdx,32 or rax,rdx mfence SpeedTestEmptyFunc: ret end if ; used SpeedTestRDTSC ; Read TSC via RDTSCP [for internal use] ; Returns: rax = current TSC counter value, ecx = processor id if used SpeedTestRDTSCP SpeedTestRDTSCP: rdtscp shl rdx,32 or rax,rdx mfence ret end if ; used SpeedTestRDTSCP if used InsertionSort64 ; Insertion sort of 64-bit elements ; Parameters: rcx = array address, rdx = number of elements InsertionSort64: mov r8d,1 ; start key_index cmp rdx,r8 jle .exit ; jump if number of element <= 1 .loop1: mov rax,[rcx+r8*8] ; key mov r9,r8 ; el_index .loop2: mov r10,[rcx+(r9-1)*8] ; prev_el cmp r10,rax ; prev_el <=> key ? jng @F mov [rcx+r9*8],r10 ; if (prev_el > key) el = prev_el dec r9 ; --el_index jnz .loop2 ; repeat if el_index > 0 @@: mov [rcx+r9*8],rax ; el = key inc r8 ; ++key_index cmp r8,rdx jb .loop1 ; repeat if key_index < number of elements .exit: ret end if ; used InsertionSort64 ;-- DATA SECTION ------------------------------------------------------------------------------------------------------- .data if used SpeedTestInit SpeedTestGetTSC dq SpeedTestRDTSC ; TSC read procedure SpeedTestOverhead rq 1 ; TSC read overhead tick count SpeedTestResults rq SPEEDTEST_REPEATS ; Temporary result array end if ; used SpeedTestInit ;-- IMPORT SECTION ----------------------------------------------------------------------------------------------------- section '.idata' import data readable library kernel32, 'kernel32.dll',\ msvcrt, 'msvcrt.dll' import_kernel32 all_api import msvcrt,\ printf, 'printf',\ getch, '_getch'