Documentation tool for ASM (and others, hopefully)

TNick · January 10, 2010, 07:29:31 PM

Hello, all. I would like to ask the community to comment about the format that I propose below for documenting MASM projects (which may be extended to document other assembly language projects).
The project that I'm working on intends to be most flexible, so the user may write own parser / modify an existing one. But the app should came with an example, and this example parser may be used by vast majority of users. But enough with talk, here it is:

Code Select


;=================================================================================================;
;{                                                                                               };
;{ FILE DESCRIPTION:      THIS REPRESENTS SHORT DESCRIPTION OF THE FILE (ONE LINE ONLY)          };
;{                                                                                               };
;=================================================================================================;
;{                                                                                               };
;{      Here is where extended description of the file may be placed, on multiple lines          };
;{ as fit for any file. This description is optional.                                            };
;{                                                                                               };
;{      As above, blank lines may be used to enhance visual efect. The description ends with     };
;{ first line that is not a comment (either blank or with some text on it                        };
;{                                                                                               };
;=================================================================================================;

; only lines that start with ;{ are considered for output; this line is simply ignored
; if a } (followed or not by a ;) is/are found at end of line, will be ignored
; 


.DATA
        
        ;{ this is the description for a variable in const, data or data? sections };
        ;{ which may consist of several lines                                      };
        AVar            DWORD           0       ;{ above, on same line and         };
                                                ;{ below definition                };
        
.CONST
        ;{ a noncomment line must be inserted between a comment that refers };
        ;{ to previous variable and a line that refers to next variable     };
        ASecondVar      DWORD           1       ;{ if on same line, you can have them glued up };

.DATA?
        ;{ this refers to third variable                                    };
        ThirdVar        DWORD           ?

;=================================================================================================;
;{ VARGRP: THIS IS A GROUP OF VARIABLES THAT SHARE SOME COMMON PROPERTY                          };
;=================================================================================================;
;{      Here goes the description for this group of variables                                    };
;{      The description ends when a non-comment is found, or a comment that misses {             };
;=================================================================================================;
        
        ;{ each variable has it's own description as before };
        Vargrp_1        DWORD           ?
        
        Vargrp_2        BYTE            ?
        
; this is how you end a group (end of file should do it, too)

;{ VARGRP END                                                                                    };
;=================================================================================================;
        
        
        ;{ this is the description for a constant                           };
        ;{ which may consist of several lines                               };
        AsmTVar1        =               0       ;{ above, on same line and  };
                                                ;{ below definition         };

        ;{ this is the description for a constant                           };
        ;{ which may consist of several lines                               };
        AsmTVar2        equ             0+41    ;{ above, on same line and  };
                                                ;{ below definition         };

        ;{ this is the description for a constant                           };
        ;{ which may consist of several lines                               };
        AsmTVar3        textequ         <ebp>   ;{ above, on same line and  };
                                                ;{ below definition         };


;=================================================================================================;
;{ CONSTGRP: THIS IS A GROUP OF CONSTANTS THAT SHARE SOME COMMON PROPERTY                        };
;=================================================================================================;
;{      Here goes the description for this group of constants                                    };
;{      The description ends when a non-comment is found, or a comment that misses {             };
;=================================================================================================;

        ;{ each variable may have  it's own description as before };
        Constgrp_1      =               10
        
        Constgrp_2      equ             WM_PAINT

;{ CONSTGRP END                                                                                  };
;=================================================================================================;

.CODE

;=================================================================================================;
;{ FUNCTION: SHORT DESCRIPTION OF THE FUNCTION TO BE USED IN INDEXES                             };
;=================================================================================================;
; if no argument is present, the line may be left out, or this form may be used: ARGUMENTS: NONE 
;{ ARGUMENTS: - Arg1: this is the description for first argument                                 };
;{            - Arg2: this is the description for second argument                                };
; 3 ways of talking about returned value(s) 
;{ RETURNS: SUCCESS: - EAX: pointer to allocated memory                                          };
;{                   - ZF: reset                                                                 };
;{          FAILURE: - EAX: zero                                                                 };
;{                   - ZF: set                                                                   };
;{ RETURNS: - EAX: pointer to allocated memory in case of success, 0 otherwise                   };
;{          - ZF: reset for success, set otherwise                                               };
;{ RETURNS: The function will return a pointer to allocated chunk of memory in eax and zf reset. };
;{          If, for some reason, the function fails, it will return 0 in eax and ZF set.         };
; description, if present, should be last (first before function's body
;{ DESCRIPTION:                                                                                  };
;{      This is where the main description goes. It may consist of multiple lines; each end of   };
;{ the line in source file is  interpreted as a space. If a new paragraph is required, it should };
;{ start with a TAB cahracter.                                                                   };
;{      Like so. Descriptions in general are chains of paragraphs.                               };
;{                                                                                               };
;{      As above, blank lines may be used to enhance visual efect. The description ends with     };
;{ first line that is not a comment (either blank or with some text on it                        };
;{                                                                                               };
;=================================================================================================;
Some_Function           PROC    FAR STDCALL PUBLIC <FORCEFRAME> USES esi edi ebx,Arg1:DWORD,Arg2
        ;{ description for local
        ;{ elements may be placed
        LOCAL   LocVar1:DWORD   ;{ before, on same line
                                ;{ and after the definition
                                ;{ if a line of comments is present after the line where is
                                ;{ defined, then a blank line should be placed to
                                ;{ separate last line of this comment from first
                                ;{ line of next comment
                                
        ;{ like so; this is first line of description for second local
        LOCAL   LocVar2:DWORD
                                ;{ this comment will be ignored, because second local
                                ;{ does not have a comment on same line as itself
                                ;{ if there would be no empty line after, it would be
                                ;{ part of description for LocVar3. But it's not
                                
        LOCAL   LocVar3:DWORD   ;{description may be present only on same line
        LOCAL   LocVar4:DWORD   ;{description may be present only on same line
        
        ;{ description may be present only above
        LOCAL   LocVar5[16]:BYTE
        ;{ description may be present only above
        LOCAL   LocVar6[16]:BYTE
        
        
        ; some comment
        mov     eax,            ecx
        
        
        ret
Some_Function           ENDP
;=================================================================================================;

;=================================================================================================;
;{ STRUCT: SOME STRUCTURE DEFINITION; THIS DESCRIPTION IS ONE LINE ONLY                          };
;=================================================================================================;
;{      Here goest the extended description for the structure, if there is one. Same rules as in };
;{ function's description apply.                                                                 };
;=================================================================================================;
Some_Structure          STRUCT
        
        ;{ same rules as for locals
        ;{ apply to structure members,};
        A_member_1      BYTE            ?;{ too
        ;{ description may be present only above
        A_member_2      BYTE            ?
        A_member_3      DWORD           ?       ;{description may be present only on same line
        
        ;{ unions may have description, too }
        UNION
                ;{ and each member, too... or not}
                a       BYTE            ?
                b       WORD            ?
        ENDS
        
        ;{ nested structures may have description, too }
        STRUCT
                ;{ and each member, too... or not}
                c1      BYTE            ?
                c2      BYTE            ?
        ENDS
        
        ;{ nested structures may have description, too }
        STRUCT  Anamedstruct
                ;{ and each member, too... or not}
                c1      BYTE            ?
                c2      BYTE            ?
        ENDS
        
        
Some_Structure          ENDS
;=================================================================================================;

;=================================================================================================;
;{ MACRO: SHORT DESCRIPTION OF THE MACRO TO BE USED IN INDEXES                                   };
;=================================================================================================;
;{ ARGUMENTS: - Arg0: this is the description for first argument                                 };
;{            - Arg1: this is the description for second argument                                };
;{            - Arg2: this is the description for a macro parameter                              };
;{            - Arg3: this is the description for a macro parameter                              };
;{ RETURNS: Only one way to define macro output (as a countinous string).                        };
;{          Multiple lines may still be used and TAB character generates new paragraphs.         };
;{ DESCRIPTION:                                                                                  };
;{      This is where the main description goes. It may consist of multiple lines; each end of   };
;{ the line in source file is  interpreted as a space. If a new paragraph is required, it should };
;{ start with a TAB cahracter.                                                                   };
;{      Like so. Descriptions in general are chains of paragraphs.                               };
;{                                                                                               };
;{      As above, blank lines may be used to enhance visual efect. The description ends with     };
;{ first line that is not a comment (either blank or with some text on it                        };
;{                                                                                               };
;=================================================================================================;
Some_Macro              MACRO   Arg0, Arg1:REQ, Arg2:=<0>, Arg3:VARARG
        ;{ description for local
        ;{ elements may be placed
        LOCAL   LocVar1 ;{ before, on same line
                                ;{ and after the definition
                                ;{ just like locals in functions


ENDM
;=================================================================================================;

Now, the question is: do you think this is practical? Would you write the code in this style to be able to auto-generate documentation for your projects? If not, what style would you see fit?
A more important question, maybe... do you feel in need for such a tool (similar to Doxygen)?

Regards,
Nick

dedndave · January 10, 2010, 08:24:56 PM

i may be wrong, but i think the structures and macros have to be at the beginning
they are typically in an external INC file, though

TNick · January 10, 2010, 08:40:11 PM

Thanks for interest, Dave!

Quote from: dedndave on January 10, 2010, 08:24:56 PM
i may be wrong, but i think the structures and macros have to be at the beginning
they are typically in an external INC file, though

Oh, it's not about that. Consider that each element is in it's separate file, if you like. Finding what file belongs where is another story.

What I'm interested in is: would you consider this an efficient way to document your code? Do you ... like or dislike the style? Is it appealing to you? Again, would you adopt this style if you would like to have an automated tool document your code (generate html files for each structure, function, ..., generate indexes of structures, functions, ..., generate html mirrors of your source files, ...) latter on?

Nick

dedndave · January 10, 2010, 08:58:32 PM

it seems rather complete
i have found that, sometimes, it requires more text to explain how something works than you really want to put inside the PROC
in those cases, an external readme file may be in order
but that is prabably beyond the scope of what you are trying to do
if you want an example of one of my documented routines, d/l the LLKF9_1.zip file here....

http://www.masm32.com/board/index.php?topic=12363.msg94779#msg94779

TNick · January 10, 2010, 09:21:48 PM

Yes, I have seen your file. interesting thing about it is that you have placed the description inside procedure. Maybe I should consider that, too. But where? before locals? After...? Anywhere.... yes, that should be it. You may place an unnamed function block (the commented thingy, that is) anywhere before or inside a procedure; if you place it anywhere else, you have to name it with same name as the procedure, like so:

Code Select


;=================================================================================================;
;{ FUNCTION Some_Function: SHORT DESCRIPTION OF THE FUNCTION TO BE USED IN INDEXES               };
;=================================================================================================;

Nick

dedndave · January 10, 2010, 09:26:49 PM

well - as you can see in the order i did it - i put the description and usage at the top
the locals, etc are essentially part of the code

jj2007 · January 10, 2010, 10:46:50 PM

Quote from: TNick on January 10, 2010, 08:40:11 PM
What I'm interested in is: would you consider this an efficient way to document your code? Do you ... like or dislike the style? Is it appealing to you? Again, would you adopt this style if you would like to have an automated tool document your code (generate html files for each structure, function, ..., generate indexes of structures, functions, ..., generate html mirrors of your source files, ...) later on?
Nick

Nick,
That is an interesting proposal. The question is really if enough people find the energy and discipline to follow such a scheme...
Let me give a brief example of how I manage my library. It contains a debug macro that shows a popup with register values, memory content etc, until you click cancel. Easy to use, complex under the surface. What happens is that I have one para of description in an RTF file as follows:

Quote deb
   deb 1, "The first loop", ecx, esi$, edi$, MyReal10, ST(1)
   deb 2, "Second loop:", al, ecx, esi$, $My$

Rem[/color]   the debug macro preserves registers and flags
   can show FPU registers but trashes ST(6) and ST(7)
   the string content of registers can be shown by using eax$, ecx$, esi$ etc.
   global and local variables can be shown as strings by using e.g. $buffer
   cancelling deb 1, ... does not cancel deb 2, ..., so you can test several loops in one go
   deb 1, ... deb 4, ... are being displayed, while deb 5, writes to a file:
   deb 5, "ToFile", eax, esi$, edi$ saves contents (without showing them) to DebLog.txt
Key   deb

Now let's assume I need further "documentation" because something is going wrong. Typically I would first consult this file, in the hope to find a link to the original source. Having found that source, I would identify the bug, and try to find out which loop or which variable is responsible for the problem. Then I would solve it somehow.

For the latter step, I do not need a generated help file, but rather the original source - and that one should be well commented, of course.

So the moral of the story is:
1. If the deb macro isn't broken, fine, then the few lines above are sufficient
2. If it's broken, then a generic description will not help, I need well-documented source code.

Masm32 follows basically that logic:
1. There are help files (too many for my taste, and not searchable enough) that give you short usage info.
2. If that is not enough, you know where to find the original source - try your luck...

Don't misunderstand me: I am not criticising your proposed style. I am just trying to be realistic, and my feeling is that in order to force nerds into documentation, the ratio added value / added effort must be incredibly high. Microsoft pays people explicitly to do this documentation - Masm32 is run by a bunch of hobby programmers (and some professionals, too). But maybe other members' minds work differently, so let's collect some more opinions before trying to draw any lessons.
:thumbu

TNick · January 11, 2010, 06:41:37 AM

Hello, Jochen.

What you say is logical, no doubt about it. But the (main) purpose of the tool is a little bit different, I would say. It is intended to give to an outsider (you are an outsider after a couple of months) an structured view about your project. All definitions are easy to reach in, say, html documentation, using the links and tables for clear structured view. You can read "the story" without having to scroll to lines of code.

Quote from: jj2007 on January 10, 2010, 10:46:50 PM
Don't misunderstand me: I am not criticising your proposed style.

:) The style is posted here for criticism.
As I said, each user will be able to write a parser for it's own personal taste. I just need to write a parser that should run "out of the box".

Now, about your macro... the quote that you have posted here is exactly what you find in RFT file? This is a rather interesting system. I would like to hear more about it.

Quote from: jj2007 on January 10, 2010, 10:46:50 PM
my feeling is that in order to force nerds into documentation, the ratio added value / added effort must be incredibly high

:bdg Yes, that's the ratio that I try to adjust... :)

Thanks,
Nick

sinsi · January 11, 2010, 07:16:09 AM

I like to see an entire proc (or section) on one screen, so an enormous header before the proc, for me, is too much.
If you use descriptive names in a proc declaration it is usually good enough and you can also use them in a proto, instead of :dword,:dword.

Programmers are prone to idiosyncrasies (like humans) so whatever works for you is good. I still don't understand jj's code when it's rgb but that's just me...

>You can read "the story" without having to scroll to lines of code.
To me, that's backwards - you can't get the story without going through the code. Must be why I hate macros :P

TNick · January 11, 2010, 08:05:45 AM

Hi, sinsi!

Quote from: sinsi on January 11, 2010, 07:16:09 AM
I like to see an entire proc (or section) on one screen, so an enormous header before the proc, for me, is too much.
If you use descriptive names in a proc declaration it is usually good enough and you can also use them in a proto, instead of :dword,:dword.

Yes. This is one of main reason why a function's description (enormous header :green2 ) may be placed in a different part of your project (like a txt file, or at the beginning/end of the file), but in this case you need to place the name of that entity after keyword (FUNCTION Some_Function). For a procedure that has no header, the page simply lists the name, the arguments and locals...

Quote from: sinsi on January 11, 2010, 07:16:09 AM
I still don't understand jj's code when it's rgb but that's just me...

:bdg

Best regards,
Nick

jj2007 · January 11, 2010, 08:07:07 AM

Quote from: TNick on January 11, 2010, 06:41:37 AM
the tool is a little bit different, I would say. It is intended to give to an outsider (you are an outsider after a couple of months) an structured view about your project. All definitions are easy to reach in, say, html documentation, using the links and tables for clear structured view. You can read "the story" without having to scroll to lines of code.

Hello Nick,
I understand the good intentions, and I admit it would be a good system. The problem is Organically Grown Code^TM. My code is either short and slef-explanatory, or it is so complex that I navigate myself by debugging or chasing global variables all over the place. If it's working (most of the time), I won't touch it, let alone add structured documentation. Now that is bad behaviour, but I have a suspicion that it is fairly common ::)

Quote
Now, about your macro... the quote that you have posted here is exactly what you find in RFT file? This is a rather interesting system. I would like to hear more about it.

Sample attached. I would not dare to call it a system, it's just the .rtf flavour of .hlp or .chm files. If a user selects a word, e.g. Recall, in RichMasm and presses F1, the editor searches the attached file for MbRecall and displays the para. Simple and effective, but that is exactly what you can do with *.hlp, too. I chose this "system" because I can control it myself, and because the newer .chm format does have no easy way to get this "select and press F1" behaviour....

Quote from: jj2007 on January 10, 2010, 10:46:50 PM
my feeling is that in order to force nerds into documentation, the ratio added value / added effort must be incredibly high

:bdg Yes, that's the ratio that I try to adjust... :)

Thanks,
Nick

Quote

rags · January 11, 2010, 10:52:56 AM

Quote from: sinsi on January 11, 2010, 07:16:09 AM
Programmers are prone to idiosyncrasies (like humans)

Hey Sinsi, if we are not human what are we? :bdg

TNick · January 11, 2010, 11:43:22 AM

Quote from: jj2007 on January 11, 2010, 08:07:07 AM
... or it is so complex that I navigate myself by debugging or chasing global variables all over the place....

That is exactly the problem that I try to solve. I find myself more willing to write the code again than to modify an existing piece (which takes longer, of course). But that is because the time it takes to understand what I was thinking a while ago feels like lost time. So, if the process of understanding is easy, I may be more willing to try to modify existing code....

Quote from: rags on January 11, 2010, 10:52:56 AM
Quote from: sinsi on January 11, 2010, 07:16:09 AM
Programmers are prone to idiosyncrasies (like humans)
Hey Sinsi, if we are not human what are we? :bdg

Japeth is Messiah, I don't know anything about the rest... :P

Nick

oex · January 11, 2010, 12:27:30 PM

Quote from: TNick on January 11, 2010, 11:43:22 AM
I find myself more willing to write the code again than to modify an existing piece (which takes longer, of course). But that is because the time it takes to understand what I was thinking a while ago feels like lost time. So, if the process of understanding is easy, I may be more willing to try to modify existing code....

This is a layout/modulation issue.... If you build your code into well defined 'objects' conforming to a simple predefined layout the code is both easier to understand and can be partially copied/pasted if speed absolutely necessary....

ie a function to read a bmp split into several sections with crlf is easily read.... The smaller the amount of data you have to take in the better, too many comments just confuse your view on the screen with scroll bars etc....

One trick I use is to indent an extra level with a tab for everything that could be considered part of a parent block ie conditionals, loops etc that way you can immediately see main loops etc without any color coding or extra garbage....

TNick · January 12, 2010, 10:24:28 AM

Quote from: oex on January 11, 2010, 12:27:30 PM
This is a layout/modulation issue.... If you build your code into well defined 'objects' conforming to a simple predefined layout the code is both easier to understand and can be partially copied/pasted if speed absolutely necessary....

that's the theory. Hard to put in practice sometimes.

Quote from: oex on January 11, 2010, 12:27:30 PM
One trick I use is to indent an extra level with a tab for everything that could be considered part of a parent block ie conditionals, loops etc that way you can immediately see main loops etc without any color coding or extra garbage....

Indenting is a good method. I'm glad you don't need extra documentation.

Thing is: I have a code style by now that I enjoy and I don't plan to change. To enhance this style, I am thinking about the kind of documentation I've described here. The thread is intended to check if others feel in need for such a tool and, if so, what style is most convenient. Once first parser is ready and the rules are in place, it will be easy for me to write a different one with different settings.

Regards,
Nick

News:

Documentation tool for ASM (and others, hopefully)