News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

multicore idea testpiece

Started by johnsa, March 31, 2011, 12:07:03 PM

Previous topic - Next topic

johnsa

Hey all,

So based on a discussion a long time ago on the board I've started writing a test-piece to test out a few concepts:
1) UserMode Spinlocks to replace the standard critical section/mutex's when you KNOW there will be low contention.
2) A task delegating scheduler idea to simplify/reduce overhead of creating threads the traditional albeit naive way.

So.. i've been going over this for the last 2 days.. and I can't find the bug.. the test code essentially runs two independant functions, first sequentially, then using naive threaded approach, then using the custom task delegation functions. For some reason, the last of these (using the functions included in the code seems to deadlock on the waitformultipleobjects, it looks as if the setevent is not being triggered)... any suggestions would be greatly appreciated !!! :)



include \masm32\include\masm32rt.inc

.686p
option casemap:none
option scoped
.mmx
.k3d
.xmm

; Prototypes for locking.
AllocateSpinLock proto
FreeSpinLock     proto lockAddr:DWORD
SpinLockLoop     proto lockAddr:DWORD
SpinLockLoopC    proto lockAddr:DWORD
SpinUnlock       proto lockAddr:DWORD

; Prototypes for multicore scheduler.
ActivateThreadSchedulers proto
ThreadScheduleHandler    proto nThread:DWORD
MCoreGroupWait           proto
AssignTask               proto coreNo:DWORD, taskAddr:DWORD
GetTaskState proto coreNo:DWORD
ResetCoreTask proto coreNo:DWORD
WaitCoreTask      proto coreUseCount:DWORD

func1 proto
func2 proto

SPIN_LOCK MACRO lockAddr
invoke SpinLockLoop,lockAddr
ENDM

SPIN_UNLOCK MACRO lockAddr
invoke SpinUnlock,lockAddr
ENDM

.data?

MAX_CORE_COUNT equ 32
TASK_IDLE       equ 0
TASK_RUNNING   equ 1

; Handles for per-core threads.
align 16
dThreadHandles dd MAX_CORE_COUNT DUP (?)

.data

align 16
coreNumbers dd 0,0
dd 1,0
dd 2,0
dd 3,0
dd 4,0
dd 5,0
dd 6,0
dd 7,0
dd 8,0
dd 9,0
dd 10,0
dd 11,0
dd 12,0
dd 13,0
dd 14,0
dd 15,0
dd 16,0
dd 17,0
dd 18,0
dd 19,0
dd 20,0
dd 21,0
dd 22,0
dd 23,0
dd 24,0
dd 25,0
dd 26,0
dd 27,0
dd 28,0
dd 29,0
dd 30,0
dd 31,0
                                                 
taskStates  db MAX_CORE_COUNT DUP (0)
taskLocks   dd MAX_CORE_COUNT DUP (0)
taskWaits   dd MAX_CORE_COUNT DUP (0)

coreCount dd 2 ; SET THIS TO HOW MANY CORES YOU HAVE.
loopCount dd 0
startTick dd 0

amsg db '0',0
bmsg db '1',0

counterA dd 0
counterB dd 0

timestr db 128 DUP (0)

taskA dd 0
taskB dd 0
taskAwait dd 0
taskBwait dd 0

consoleLock dd 0

;####################################################################################################################################################################################
; CODE
;####################################################################################################################################################################################
.code

start:

invoke ActivateThreadSchedulers
invoke AllocateSpinLock
mov consoleLock,eax

; sequential test 1 core. (1500ms)
invoke GetTickCount
mov startTick,eax
testloop0:
call func1
call func2
inc loopCount
cmp loopCount,10000
jl short testloop0
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

; test in parallel using 2 threads.
invoke GetTickCount
mov startTick,eax
testloop1:

invoke CreateEvent,0,FALSE,FALSE,0
mov taskAwait,eax
invoke CreateEvent,0,FALSE,FALSE,0
mov taskBwait,eax

invoke CreateThread,NULL,NULL,ADDR func1,ADDR taskA,CREATE_SUSPENDED,NULL
mov taskA,eax
invoke CreateThread,NULL,NULL,ADDR func2,ADDR taskB,CREATE_SUSPENDED,NULL
mov taskB,eax
invoke SetThreadIdealProcessor,taskA,0
invoke SetThreadIdealProcessor,taskB,1
invoke ResumeThread,taskA
invoke ResumeThread,taskB

invoke WaitForMultipleObjects,2,OFFSET taskAwait,TRUE,INFINITE

invoke CloseHandle,taskA
invoke CloseHandle,taskB
invoke CloseHandle,taskAwait
invoke CloseHandle,taskBwait

inc loopCount
cmp loopCount,10000
jl testloop1
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

; test in parallel using custom mcore handler.
invoke GetTickCount
mov startTick,eax
testloop2:
invoke AssignTask,0,ADDR func1
invoke AssignTask,1,ADDR func2
invoke WaitCoreTask,2
inc loopCount
cmp loopCount,10000
jl short testloop2
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

invoke FreeSpinLock,consoleLock
invoke ExitProcess,0


func1 proc
mov ecx,100000
floop:
add counterA,ecx
dec ecx
jnz short floop
invoke SetEvent,taskAwait
ret
func1 endp

func2 proc

mov ecx,100000
floop:
add counterB,ecx
dec ecx
jnz short floop
invoke SetEvent,taskBwait
ret
func2 endp

;####################################################################################################################################################################################
; SUPPORT FUNCTIONS
;####################################################################################################################################################################################
align 16
AllocateSpinLock PROC

invoke HeapAlloc,rv(GetProcessHeap),HEAP_ZERO_MEMORY,(4+128)
.if eax == 0
ret
.endif
memalign eax,128 ; Ensure that the start address and size occupy a single un-touched cacheline.

ret
AllocateSpinLock ENDP

align 16
FreeSpinLock PROC lockAddr:DWORD

invoke HeapFree,rv(GetProcessHeap),NULL,lockAddr

ret
FreeSpinLock ENDP

align 16
SpinLockLoop PROC lockAddr:DWORD

push edx
mov edx,lockAddr

aquireLock:
lock bts dword ptr [edx],0
jnc short gotLock

align 4
lockWait:
pause
test dword ptr [edx],1
jne short lockWait
jmp short aquireLock

gotLock:
pop edx

ret
SpinLockLoop ENDP

align 16
SpinLockLoopC PROC lockAddr:DWORD

push edx
mov edx,lockAddr

aquireLock:
lock bts dword ptr [edx],0
jnc short gotLock

lockWait0:
mov ecx,10000
align 4
lockWait:
dec ecx
jz short gotLock
pause
test dword ptr [edx],1
jne short lockWait
jmp short aquireLock

gotLock:
pop edx

ret
SpinLockLoopC ENDP

align 16
SpinUnlock PROC lockAddr:DWORD

push edx
mov edx,lockAddr
xor eax,eax
mov [edx],eax
pop edx

ret
SpinUnlock ENDP

;----------------------------------------------------------------------------------------------------------------------
; Called by main application/DLL setup to allow per-core task delegation.
;----------------------------------------------------------------------------------------------------------------------
ActivateThreadSchedulers proc
LOCAL curCore:DWORD

pushad

mov curCore,0
mov ecx,coreCount
mov esi,offset dThreadHandles
mov edi,offset taskLocks
createThreads:

push ecx

;------------------------------------------------
; Create Per Core Thread
;------------------------------------------------
mov eax,offset coreNumbers
mov ebx,curCore
shl ebx,3
add eax,ebx
invoke CreateThread,NULL,NULL,ADDR ThreadScheduleHandler,eax,CREATE_SUSPENDED,NULL
pop ecx
push ecx
mov [esi],eax ; Store thread handle.
dec ecx
invoke SetThreadIdealProcessor,[esi],ecx

;------------------------------------------------
; Create a Wait Event For Each Thread
;------------------------------------------------
invoke CreateEvent,0,FALSE,FALSE,0
mov ebx,curCore
mov taskWaits[ebx*4],eax

;------------------------------------------------
; Create Custom Task Locks
;------------------------------------------------
invoke AllocateSpinLock
mov ebx,curCore
shl ebx,2
mov [edi+ebx],eax

pop ecx

add esi,4
inc curCore
dec ecx

jnz short createThreads

popad

ret
ActivateThreadSchedulers endp

;----------------------------------------------------------------------------------------------------------------------
; Reset a core's task state.
;----------------------------------------------------------------------------------------------------------------------
ResetCoreTask proc coreNo:DWORD

; Lock task state.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

; Flag task as new/reset.
mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
mov byte ptr [esi],TASK_IDLE

; Unlock task state.
SPIN_UNLOCK eax

ret
ResetCoreTask endp

;----------------------------------------------------------------------------------------------------------------------
; Obtain a core's current task state.
; Returns: AL = state [ idle, running ]
;----------------------------------------------------------------------------------------------------------------------
GetTaskState proc coreNo:DWORD

mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
xor eax,eax
mov al,[esi]

ret
GetTaskState endp

;----------------------------------------------------------------------------------------------------------------------
; Wait for all Core Tasks to Complete.
;----------------------------------------------------------------------------------------------------------------------
WaitCoreTask proc coreUseCount:DWORD

invoke WaitForMultipleObjects,coreUseCount,OFFSET taskWaits,TRUE,INFINITE
; xor ecx,ecx
;fixwait:
;
; mov al,taskStates[ecx]
; .if al != TASK_IDLE
; push ecx
; invoke Sleep,2
; pop ecx
; jmp fixwait
; .endif
;
; inc ecx
; cmp ecx,coreUseCount
; jl short fixwait
;

; xor ecx,ecx
;closeWaits:
; mov eax,taskWaits[ecx*4]
; .if eax != 0
; push ecx
; invoke ResetEvent,eax
; pop ecx
; .endif
; inc ecx
; cmp ecx,MAX_CORE_COUNT
; jl short closeWaits

ret
WaitCoreTask endp

;----------------------------------------------------------------------------------------------------------------------
; Generic Per-Core Thread Handler. Allows responsive delegation of per-core tasks.
;----------------------------------------------------------------------------------------------------------------------
align 16
ThreadScheduleHandler proc nThread:DWORD

align 16
schedulerLoop:

; Aquire task state lock.
mov esi,offset taskLocks
mov eax,nThread
mov ebx,[eax]
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

mov eax,nThread
mov ecx,[eax+4]

; Is there a task?
.if ecx != 0

; Execute task
call dword ptr ecx

; Reset task pointer to 0.
mov eax,nThread
mov dword ptr [eax+4],0

.endif

; Release task state lock.
mov esi,offset taskLocks
mov eax,nThread
mov ebx,[eax]
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_UNLOCK eax

; Flag task as complete.
mov eax,nThread
mov ebx,[eax]
mov esi,offset taskStates
add esi,ebx
mov byte ptr [esi],TASK_IDLE

; Flag the task wait state and suspend thread.
mov eax,nThread
mov eax,[eax]
mov ebx,dThreadHandles[eax*4]
mov eax,taskWaits[eax*4]
invoke SetEvent,eax
invoke SuspendThread,ebx

jmp schedulerLoop

ret
ThreadScheduleHandler endp

;----------------------------------------------------------------------------------------------------------------------
; Assign a task to a given core(n).
;----------------------------------------------------------------------------------------------------------------------
align 16
AssignTask proc coreNo:DWORD, taskAddr:DWORD

push esi
push ebx

; Aquire task state lock.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

; Flag task as running.
mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
mov byte ptr [esi],TASK_RUNNING

; Set Core Task Pointer.
mov esi,offset coreNumbers
mov eax,coreNo
shl eax,3
mov ebx,taskAddr
mov [esi+eax+4],ebx

; Release task state lock.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_UNLOCK eax

; Relaunch the thread scheduler.
mov eax,coreNo
mov eax,dThreadHandles[eax*4]
invoke ResumeThread,eax

pop ebx
pop esi

ret
AssignTask endp

end start



johnsa

The problem is intermittent time wise and waitmultipleobjects never returns. If you set it's timeout to 2sec or so, it will eventually work.. but that doesn't really solve the underlying problem as to why even though the setevent is being called, it doesn't return.

johnsa

So I eventually solved this. The problem was there was space for deadlocks to occur between setevent and suspendthread. I changed the model slightly to use double locking and then have a signalandwait so the whole event/suspension of scheduler becomes atomic. This seems to be working quite well for me now. Please try it out and feel free to come up with some tests for it.



include \masm32\include\masm32rt.inc

.686p
option casemap:none
option scoped
.mmx
.k3d
.xmm

; Prototypes for locking.
AllocateSpinLock proto
FreeSpinLock     proto lockAddr:DWORD
SpinLockLoop     proto lockAddr:DWORD
SpinLockLoopC    proto lockAddr:DWORD
SpinUnlock       proto lockAddr:DWORD

; Prototypes for multicore scheduler.
ActivateThreadSchedulers proto
ThreadScheduleHandler    proto nThread:DWORD
MCoreGroupWait           proto
AssignTask               proto coreNo:DWORD, taskAddr:DWORD
GetTaskState proto coreNo:DWORD
ResetCoreTask proto coreNo:DWORD
WaitCoreTask      proto coreUseCount:DWORD

func1 proto
func2 proto

SPIN_LOCK MACRO lockAddr
invoke SpinLockLoop,lockAddr
ENDM

SPIN_UNLOCK MACRO lockAddr
invoke SpinUnlock,lockAddr
ENDM

.data?

MAX_CORE_COUNT equ 32
TASK_IDLE       equ 0
TASK_RUNNING   equ 1

; Handles for per-core threads.
align 16
dThreadHandles dd MAX_CORE_COUNT DUP (?)

.data

align 16
coreNumbers dd 0,0
dd 1,0
dd 2,0
dd 3,0
dd 4,0
dd 5,0
dd 6,0
dd 7,0
dd 8,0
dd 9,0
dd 10,0
dd 11,0
dd 12,0
dd 13,0
dd 14,0
dd 15,0
dd 16,0
dd 17,0
dd 18,0
dd 19,0
dd 20,0
dd 21,0
dd 22,0
dd 23,0
dd 24,0
dd 25,0
dd 26,0
dd 27,0
dd 28,0
dd 29,0
dd 30,0
dd 31,0
                                                 
taskStates  db MAX_CORE_COUNT DUP (0)
taskLocks   dd MAX_CORE_COUNT DUP (0)
taskWaits   dd MAX_CORE_COUNT DUP (0)
taskHalts   dd MAX_CORE_COUNT DUP (0)

coreCount dd 2 ; SET THIS TO HOW MANY CORES YOU HAVE.
loopCount dd 0
startTick dd 0

amsg db '0',0
bmsg db '1',0

counterA dd 0
counterB dd 0

timestr db 128 DUP (0)

taskA dd 0
taskB dd 0
taskAwait dd 0
taskBwait dd 0

consoleLock dd 0

;####################################################################################################################################################################################
; CODE
;####################################################################################################################################################################################
.code

start:

invoke ActivateThreadSchedulers
invoke AllocateSpinLock
mov consoleLock,eax

; sequential test 1 core. (1500ms)
invoke GetTickCount
mov startTick,eax
testloop0:
call func1
call func2
inc loopCount
cmp loopCount,10000
jl short testloop0
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

; test in parallel using 2 threads.
invoke GetTickCount
mov startTick,eax
testloop1:

invoke CreateEvent,0,FALSE,FALSE,0
mov taskAwait,eax
invoke CreateEvent,0,FALSE,FALSE,0
mov taskBwait,eax

invoke CreateThread,NULL,NULL,ADDR func1,ADDR taskA,CREATE_SUSPENDED,NULL
mov taskA,eax
invoke CreateThread,NULL,NULL,ADDR func2,ADDR taskB,CREATE_SUSPENDED,NULL
mov taskB,eax
invoke SetThreadIdealProcessor,taskA,0
invoke SetThreadIdealProcessor,taskB,1
invoke ResumeThread,taskA
invoke ResumeThread,taskB

invoke WaitForMultipleObjects,2,OFFSET taskAwait,TRUE,INFINITE

invoke CloseHandle,taskA
invoke CloseHandle,taskB
invoke CloseHandle,taskAwait
invoke CloseHandle,taskBwait

inc loopCount
cmp loopCount,10000
jl testloop1
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

; test in parallel using custom mcore handler.
invoke GetTickCount
mov startTick,eax
testloop2:
invoke AssignTask,0,ADDR func1
invoke AssignTask,1,ADDR func2
invoke WaitCoreTask,2
inc loopCount
cmp loopCount,10000
jl short testloop2
invoke GetTickCount
sub eax,startTick
invoke dwtoa,eax,ADDR timestr
invoke StdOut,ADDR timestr
invoke StdOut,CTXT("ms")
mov loopCount,0
mov counterA,0
mov counterB,0

invoke FreeSpinLock,consoleLock
invoke ExitProcess,0


func1 proc
mov ecx,100000
floop:
add counterA,ecx
dec ecx
jnz short floop
invoke SetEvent,taskAwait
ret
func1 endp

func2 proc

mov ecx,100000
floop:
add counterB,ecx
dec ecx
jnz short floop
invoke SetEvent,taskBwait
ret
func2 endp

;####################################################################################################################################################################################
; SUPPORT FUNCTIONS
;####################################################################################################################################################################################

;----------------------------------------------------------------------------------------------------------------------
; Allocate a New Spin Lock Data Structure.
;----------------------------------------------------------------------------------------------------------------------
align 16
AllocateSpinLock PROC

invoke HeapAlloc,rv(GetProcessHeap),HEAP_ZERO_MEMORY,(4+128)
.if eax == 0
ret
.endif
memalign eax,128 ; Ensure that the start address and size occupy a single un-touched cacheline.

ret
AllocateSpinLock ENDP

;----------------------------------------------------------------------------------------------------------------------
; Release a Spin Lock Structure.
;----------------------------------------------------------------------------------------------------------------------
align 16
FreeSpinLock PROC lockAddr:DWORD

invoke HeapFree,rv(GetProcessHeap),NULL,lockAddr

ret
FreeSpinLock ENDP

;----------------------------------------------------------------------------------------------------------------------
; Blocking Spin Lock.
;----------------------------------------------------------------------------------------------------------------------
align 16
SpinLockLoop PROC lockAddr:DWORD

push edx
mov edx,lockAddr

aquireLock:
lock bts dword ptr [edx],0
jnc short gotLock

align 4
lockWait:
pause
test dword ptr [edx],1
jne short lockWait
jmp short aquireLock

gotLock:
pop edx

ret
SpinLockLoop ENDP

;----------------------------------------------------------------------------------------------------------------------
; Timeout Based Spin Lock.
;----------------------------------------------------------------------------------------------------------------------
align 16
SpinLockLoopC PROC lockAddr:DWORD

push edx
mov edx,lockAddr

aquireLock:
lock bts dword ptr [edx],0
jnc short gotLock

lockWait0:
mov ecx,10000
align 4
lockWait:
dec ecx
jz short gotLock
pause
test dword ptr [edx],1
jne short lockWait
jmp short aquireLock

gotLock:
pop edx

ret
SpinLockLoopC ENDP

;----------------------------------------------------------------------------------------------------------------------
; Dirty Yet Safe Relase Spin Lock.
;----------------------------------------------------------------------------------------------------------------------
align 16
SpinUnlock PROC lockAddr:DWORD

push edx
mov edx,lockAddr
xor eax,eax
mov [edx],eax
pop edx

ret
SpinUnlock ENDP

;----------------------------------------------------------------------------------------------------------------------
; Called by main application/DLL setup to allow per-core task delegation.
;----------------------------------------------------------------------------------------------------------------------
ActivateThreadSchedulers proc
LOCAL curCore:DWORD

pushad

mov curCore,0
mov ecx,coreCount
mov esi,offset dThreadHandles
mov edi,offset taskLocks
createThreads:

push ecx

;------------------------------------------------
; Create Per Core Thread
;------------------------------------------------
mov eax,offset coreNumbers
mov ebx,curCore
shl ebx,3
add eax,ebx
invoke CreateThread,NULL,NULL,ADDR ThreadScheduleHandler,eax,CREATE_SUSPENDED,NULL
pop ecx
push ecx
mov [esi],eax ; Store thread handle.
dec ecx
invoke SetThreadIdealProcessor,[esi],ecx

;------------------------------------------------
; Create a Wait and Halt Event For Each Thread
;------------------------------------------------
invoke CreateEvent,0,FALSE,FALSE,0
mov ebx,curCore
mov taskWaits[ebx*4],eax
invoke CreateEvent,0,FALSE,FALSE,0
mov ebx,curCore
mov taskHalts[ebx*4],eax

;------------------------------------------------
; Create Custom Task Locks
;------------------------------------------------
invoke AllocateSpinLock
mov ebx,curCore
shl ebx,2
mov [edi+ebx],eax

pop ecx

add esi,4
inc curCore
dec ecx

jnz short createThreads

popad

ret
ActivateThreadSchedulers endp

;----------------------------------------------------------------------------------------------------------------------
; Reset a core's task state.
;----------------------------------------------------------------------------------------------------------------------
ResetCoreTask proc coreNo:DWORD

; Lock task state.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

; Flag task as new/reset.
mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
mov byte ptr [esi],TASK_IDLE

; Unlock task state.
SPIN_UNLOCK eax

ret
ResetCoreTask endp

;----------------------------------------------------------------------------------------------------------------------
; Obtain a core's current task state.
; Returns: AL = state [ idle, running ]
;----------------------------------------------------------------------------------------------------------------------
GetTaskState proc coreNo:DWORD

mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
xor eax,eax
mov al,[esi]

ret
GetTaskState endp

;----------------------------------------------------------------------------------------------------------------------
; Wait for all Core Tasks to Complete.
;----------------------------------------------------------------------------------------------------------------------
WaitCoreTask proc coreUseCount:DWORD

invoke WaitForMultipleObjects,coreUseCount,OFFSET taskWaits,TRUE,INFINITE

ret
WaitCoreTask endp

;----------------------------------------------------------------------------------------------------------------------
; Generic Per-Core Thread Handler. Allows responsive delegation of per-core tasks.
;----------------------------------------------------------------------------------------------------------------------
align 16
ThreadScheduleHandler proc nThread:DWORD

align 16
schedulerLoop:

; Aquire task state lock.
mov esi,offset taskLocks
mov eax,nThread
mov ebx,[eax]
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

; Ensure the scheduler is set to halt.
mov eax,nThread
mov eax,[eax]
mov eax,taskHalts[eax*4]
invoke ResetEvent,eax

mov eax,nThread
mov ecx,[eax+4]

; Is there a task?
.if ecx != 0

; Execute task
call dword ptr ecx

; Reset task pointer to 0.
mov eax,nThread
mov dword ptr [eax+4],0

.endif

; Flag task as complete.
mov eax,nThread
mov ebx,[eax]
mov esi,offset taskStates
add esi,ebx
mov byte ptr [esi],TASK_IDLE

; Release task state lock.
mov esi,offset taskLocks
mov eax,nThread
mov ebx,[eax]
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_UNLOCK eax

; Signal Task Completion and Wait on Halt.
mov eax,nThread
mov eax,[eax]
mov ebx,taskHalts[eax*4]
mov eax,taskWaits[eax*4]
invoke SignalObjectAndWait,eax,ebx,INFINITE,FALSE

jmp schedulerLoop

ret
ThreadScheduleHandler endp

;----------------------------------------------------------------------------------------------------------------------
; Assign a task to a given core(n).
;----------------------------------------------------------------------------------------------------------------------
align 16
AssignTask proc coreNo:DWORD, taskAddr:DWORD

push esi
push ebx

; Aquire task state lock.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_LOCK eax

; Flag task as running.
mov esi,offset taskStates
mov ebx,coreNo
add esi,ebx
mov byte ptr [esi],TASK_RUNNING

; Set Core Task Pointer.
mov esi,offset coreNumbers
mov eax,coreNo
shl eax,3
mov ebx,taskAddr
mov [esi+eax+4],ebx

; Release task state lock.
mov esi,offset taskLocks
mov ebx,coreNo
shl ebx,2
add esi,ebx
mov eax,[esi]
SPIN_UNLOCK eax

; Launch core scheduler if not already running.
mov eax,coreNo
mov eax,dThreadHandles[eax*4]
invoke ResumeThread,eax

; Trigger the task halt event to allow scheduler to continue.
mov eax,coreNo
mov eax,taskHalts[eax*4]
invoke SetEvent,eax

pop ebx
pop esi

ret
AssignTask endp

end start