I need to sort several text files in ascending order, each line contains series of number and text separated by pipes.
How can I adapt string sort function in masm32 library for this purpose? I tried but failed.
Is there a better way to do it beside using crt_qsort?
I need to use callback function to convert numbers before comparing them just like in crt_qsort.
The one from masm32 library cannot correctly sort them without callback function.
When modifying, I think it crashed because insertion sort function has no stackframe
but I pushed two registers and call comparefunc, that's the problem?
I commented out the call to insertion sort at the end, it does not crash but the output is weird.
you may want us to show an example what you are trying to sort?
However, did it it work using the CRT? The CRT function are also optimized, so it is really ok to use them.
Yes it worked using crt_qsort but I like to avoid msvcrt.dll, want to do it in asm.
In attachment s1.txt is unsorted file cut from one of bigger files I want to sort.
And s2.txt is sorted by crt_qsort function.
In a line, first part is converted to DWORD and compare. if equal, compare second part.
Looks like it's almost sorted but some files are not like this, some have strings but only check first two parts.
here an quick adaption from some java source: the prog sort the file according to the numbers in the second column ('skill_level')
Quoteinclude masm32rt.inc
QSCOMP typedef proto ppsz:ptr PCHAR,iA:DWORD,iB:DWORD
qsort proto ppsz:ptr PCHAR,pCmpFnc: ptr QSCOMP,iLeft:DWORD,iRight:DWORD
comp proto ppsz: ptr PCHAR,iA:DWORD,iB:DWORD
.code
main proc
LOCAL pTxt:PCHAR
LOCAL ppsz:ptr PCHAR
LOCAL hFile:HANDLE
LOCAL nLines:DWORD
mov pTxt,alloc(fsize(ASM(mov hFile,fopen("S1.Txt"))))
mov eax,fread(hFile,pTxt,fsize(hFile))
mov nLines,rv(ltok,pTxt,ADDR ppsz)
print "nLines: "
print str$(nLines),13,10
mov edx,nLines
lea edx,[edx-1]
invoke qsort,ppsz,OFFSET comp,1,edx
mov edi,fcreate("xyz.txt")
mov ebx,ppsz
.while nLines
fprint edi,DWORD ptr [ebx]
add ebx,4
dec nLines
.endw
fclose edi
fclose hFile
inkey
exit
main endp
;return:
; 1: A <= B
; 2: A > B
comp proc uses esi edi ebx ppsz: ptr PCHAR,iA:DWORD,iB:DWORD
LOCAL A:SDWORD
LOCAL B:SDWORD
mov ebx,ppsz
mov esi,iA
mov edi,iB
mov esi,DWORD ptr [ebx+esi*4]
mov edi,DWORD ptr [ebx+edi*4]
.while CHAR ptr [esi] != '|'
inc esi
.endw
.while CHAR ptr [edi] != '|'
inc edi
.endw
inc esi
inc edi
fn crt_sscanf,esi,"%d",ADDR A
fn crt_sscanf,edi,"%d",ADDR B
mov esi,A
mov edi,B
.if esi > edi
mov eax,2
.elseif esi <= edi
mov eax,1
.endif
ret
comp endp
qsort proc uses esi ebx edi ppsz:ptr PCHAR,pCmpFnc: ptr QSCOMP,iLeft:DWORD,iRight:DWORD
LOCAL i:DWORD
LOCAL j:DWORD
LOCAL iPivot:DWORD
mov esi,ppsz
mov ebx,pCmpFnc
assume ebx: ptr QSCOMP
m2m i,iLeft
m2m j,iRight
m2m iPivot,i
mov eax,iRight
sub eax,iLeft
.if SDWORD ptr eax >= 1
mov eax,j
.while eax > i
mov edi,i
.while rv(ebx,ppsz,i,iPivot) == 1 && edi <= iRight && j > edi
inc i
invoke ebx,ppsz,i,iPivot
mov edi,i
.endw
mov edi,j
.while rv(ebx,ppsz,j,iPivot) == 2 && edi >= iLeft && edi >= i
dec j
invoke ebx,ppsz,j,iPivot
mov edi,j
.endw
mov eax,i
.if j > eax
;swap i,j
mov eax,i
mov edx,PCHAR ptr [esi+4*eax]
mov eax,j
mov ecx,PCHAR ptr [esi+4*eax]
mov [esi+4*eax],edx
mov eax,i
mov [esi+4*eax],ecx
.endif
mov eax,j
.endw
; swap j,iLeft
mov eax,iLeft
mov edx,PCHAR ptr [esi+4*eax]
mov eax,j
mov ecx,PCHAR ptr [esi+4*eax]
mov [esi+4*eax],edx
mov eax,iLeft
mov [esi+4*eax],ecx
mov eax,j
lea eax,[eax-1]
invoke qsort,ppsz,pCmpFnc,iLeft,eax
mov eax,j
lea eax,[eax+1]
invoke qsort,ppsz,pCmpFnc,eax,iRight
.endif
ret
assume ebx: nothing
qsort endp
end main
Thank you.
The output xyz.txt is not what expected.
I'll study the source tomorrow, it's 1.20 AM now.
As a non native English speaker I might made it sounds misleading of what I'm trying to do.
I'd like the result exactly as appears in S2.txt which sorted by CRT qsort.
Sort by first DWORD then among lines that share the same first DWORD, sort by second DWORD.
The result will be like
4|1|0
4|2|0
12|1|3
30|1|0
30|2|0
52|1|9
113|1|1
113|2|1
113|3|1
and so on...
qWord's code might sort the file if I can adapt it but there are macros, etc, I'm not familiar with.
So I will stick to adapting masm32 library 'assort' function.
As I used crt_qsort, the file is tokenised into array of pointer and passed to sort function, it sorted correctly.
Now I passed it to 'assort' function it sorted but not in the way I want it to.
So I need to modify it to use callback function to compare two strings instead of inline code.
The callback function return zero if equal, 1 if greater, -1 if less, just like 'lstrcmpi' did.
It also preserve all registers except EAX.
I replaced compare loops in 'asqsort' and 'acisort' with two pushes and a call to compare function.
But in 'aissort' it does not use stackframe. That's what make it crashed, is it?
How can I modify 'aissort' to use callback then?
At the end of 'acisort' I commented out the call to 'aissort' and it doesn't crash but the output is sorted oddly.
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
align 4
aissort proc arr:DWORD,cnt:DWORD
; -------------------------------
; ascending insertion string sort
; -------------------------------
mov [esp-8], ebx
mov [esp-12], esi
mov [esp-16], edi
mov [esp-20], ebp
mov edi, [esp+4] ; array address
mov edx, 1
cmp edx, [esp+8] ; compare count to EDX
jge quit
xor eax, eax ; clear EAX of previous content
entry:
mov ebx, [edi+edx*4]
mov esi, edx
inner:
mov ecx, [edi+esi*4-4]
mov ebp, -1
;----------------------------------
; I want to replace this loop with a call to compare function
;
; push ecx
; push ebx
; call comparefunc
; cmp eax, 0
;----------------------------------
stcmp: ; string comparison loop
add ebp, 1
mov al, [ecx+ebp]
cmp al, [ebx+ebp]
jl outer
jg swap
test al, al
jnz stcmp
jmp outer
;----------------------------------
;----------------------------------
swap:
mov [edi+esi*4], ecx
sub esi, 1
jnz inner
outer:
mov [edi+esi*4], ebx
add edx, 1
cmp edx, [esp+8] ; compare count to EDX
jl entry
quit:
mov ebx, [esp-8]
mov esi, [esp-12]
mov edi, [esp-16]
mov ebp, [esp-20]
ret 8
aissort endp
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
Now that I try the MASM32 aissort procedure, anytime I pass a count > 1, it triggers an access violation exception. I'm not sure whether the problem is some unusual requirement, or a bug. In any case, it's not a good starting point for what you are doing because it preserves EBX, ESI, EDI, and EBP in the "free" area of the stack, and pushing anything onto the stack from within the procedure will corrupt the preserved values.
The reason the MASM32 sort procedures do the comparison inline is to avoid the overhead of calling out of the sort loop. The CRT qsort function uses a separate comparison function so a single sort function can be used to sort any type of data, but this flexibility comes at the cost of slower sorts. To give you an idea of how much slower, I have a modified version of the qsort C source from the PSDK that does the comparisons inline, and sorting a random 10000000-element integer array it is nearly 3 times faster than crt_qsort.
Most of the files have 70,000 lines at most so the crt_qsort is usable if I don't sort all of them at once.
But I still can't give up on this.
If I pass values by registers to compare function instead of pushes, will it work?