Post Reply 
Social Buttons
 
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
microB tokenizer
03-24-2019, 08:05 PM
Post: #1
microB tokenizer
small but complete(not yet) tokenizer for BL interpreter

Code:
'microB tokenizer by Aurel 24.3.2019
Include "microBh.inc"
int tkNULL=0, tkPLUS=1, tkMINUS=2, tkMULTI=3, tkDIVIDE=4
int tkCOLON=5, tkCOMMA=6, tkLPAREN=7, tkRPAREN=8, tkLBRACKET=9, tkRBRACKET=10
int tkPRINT=11, tkDOT=12, tkLINE=13, tkCIRCLE=14 , tkEOL = 20
string tokList[256] : int typList[256]   'token/type arrays
int start , p = 1 ,start = p ,tp ,n      'init
int lineCount, Lpar, Rpar, Lbrk, Rbrk, tokerr
string code,ch,tch,tk ,crlf=chr(13)+chr(10),bf
'--------------------------------------------------------------------
code = "func(a,b): var1+ 0.5*7: str s="+ chr(34)+ "micro"+chr(34) + crlf + "if a>b: arr 10]" + crlf     ' test or load_src?
'--------------------------------------------------------------------
sub tokenizer(src as string) as int
lineCount=1
while p <= len(src)
    ' print "P:" + str(p)        
     ch = mid(src,p,1)                                                 'get char

If asc(ch)=32 : p=p+1 : end if                                        ' skip blank space[ ]
If asc(ch)=9  : p=p+1 : end if                                        ' skip TAB [    ]
If asc(ch)=13 and mid(src,p+1,1)= chr(10)                             ' skip CRLF & lineCount+1 / EOL
    if Lpar > Rpar  : tokerr=3 : goto tokExit : end if               ' if Rparen ((...)
    if Lpar < Rpar  : tokerr=4 : goto tokExit : end if              ' if Lparen (...))
    if Lbrk > Rbrk  : tokerr=5 : goto tokExit : end if                ' if Lbracket [..
    if Lbrk < Rbrk  : tokerr=6 : goto tokExit : end if                ' if Rbracket ...]
lineCount++ : p=p+2
End if
  
'--------------------------------------------------------
If asc(ch)=34                                                         ' if char is QUOTE "
p++ :  ch = mid(src,p,1) : tk=ch : p++                                ' skip quote :add ch TO tk buffer: p+1
    while asc(ch) <> 34        
       ch = mid(src,p,1) : if asc(ch)= 34 then exit while
        tk=tk+ch : p++
        IF ch = chr(10): tokerr = 2: goto tokExit : end if
    wend
    tp++ : tokList[tp] = tk : tk="":ch="": p++                         ' add quoted string to token list
End if
'-------------------------------------------------------            
If (asc(ch)>96 and asc(ch)<123)          ' [a-z]
   while (asc(ch)>96 and asc(ch)<123) or (asc(ch)>47 and asc(ch)<58)   ' [a-z0-9]*
         tk=tk+ch : p++ : ch = mid(src,p,1)
   wend
      'print "TOK-AZ:" + tk + " PAZ:" + p
       tp++ : tokList[tp] = tk : tk="":ch=""      
       'return IDENT;
End If
'--------------------------------------------------------------
'While (Asc(Look) > 47 And Asc(Look) < 58) Or Asc(Look) = 46'
If (asc(ch)>47 and asc(ch)<58)                                       ' [0-9.]
    while (asc(ch)>47 AND asc(ch)<58) OR asc(ch)=46                   ' [0-9[0.0]]*
        tk=tk+ch :p++ : ch = mid(src,p,1)
    wend
        'print "Pnum:" + str(p)
       tp++ : tokList[tp] = tk : tk="":ch=""
       'return NUMBER;
End if
'---------------------------------------------------
If asc(ch)=43 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' + plus
If asc(ch)=45 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' - minus
If asc(ch)=42 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' * multiply
If asc(ch)=47 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' / divide
If asc(ch)=40 : tp++ : tokList[tp] = ch : ch="" : p++ : Lpar++ : End if      ' ( Lparen
If asc(ch)=41 : tp++ : tokList[tp] = ch : ch="" : p++ : Rpar++ : End if      ' ) Rparen
If asc(ch)=44 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' , comma
If asc(ch)=58 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' : colon
If asc(ch)=60 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' < less
If asc(ch)=61 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' = equal
If asc(ch)=62 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' > more(greater)
If asc(ch)=91 : tp++ : tokList[tp] = ch : ch="" : p++ : Lbrk++ :End if      ' [ Lbracket
If asc(ch)=93 : tp++ : tokList[tp] = ch : ch="" : p++ : Rbrk++ :End if      ' ] Rbracket
If asc(ch)=38 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' & AND
If asc(ch)=124 :tp++ : tokList[tp] = ch : ch="": p++ : End if      ' | OR
If asc(ch)=33 : tp++ : tokList[tp] = ch : ch="" : p++ : End if      ' ! NOT

'elseif...
'End if
IF ASC(ch)>125: tokerr = 1 : goto tokExit: END IF

wend
return tp
tokExit:
  IF tokerr > 0
    if tokerr = 1: MsgBox "Unknown token!-[ " + ch +" ] at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 2: MsgBox "Unclosed Quote!- at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 3: MsgBox "Missing right paren! ((...)- at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 4: MsgBox "Missing left paren!- at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 5: MsgBox "Missing right bracket!- at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 6: MsgBox "Missing left bracket!- at LINE: " + str(lineCount),"T:Error"  : end if

    Return 0
  END IF
end sub

'call tokenizer..tested(ident,numbers)
int tn: tn = tokenizer(code) : if tn=0 then goto ExitProgram
print "Number of tokens: " + str(tn) + crlf + "Number of lines: " + str(lineCount)
for n = 1 to tn : bf = bf + tokList[n] + crlf : next n
print  bf

ExitProgram:
if tn=0: print "Program Terminated!": end if
Find all posts by this user
Quote this message in a reply
03-24-2019, 08:08 PM
Post: #2
RE: microB tokenizer
something strange geeee


Attached File(s) Image(s)
   
Find all posts by this user
Quote this message in a reply
03-26-2019, 05:25 AM
Post: #3
RE: microB tokenizer
new:
line comment with ' ..skiped EOL token
fixed EOL token
removed syntax checking with EQ sign ...should be added to SyntaxError() checking !
after tokenization...to perform complete error checking over token list.

Code:
'microB tokenizer by Aurel 24.3.2019
Include "microBh.inc"
int tkNULL=0, tkPLUS=1, tkMINUS=2, tkMULTI=3, tkDIVIDE=4
int tkCOLON=5, tkCOMMA=6, tkLPAREN=7, tkRPAREN=8, tkLBRACKET=9, tkRBRACKET=10
int tkIDENT = 11 , tkNUMBER = 12 , tkSTRING = 13, tkCOMMAND =14 ,tkEOL = 15
int tkEQUAL = 16, tkMORE = 17, tkLESS =18,tkAND=19, tkOR=20, tkNOT = 21
int tkHASH=22 , tkSSTR=23, tkMOD=24
string tokList[1024] : int typList[1024]   'token/type arrays
int start , p = 1 ,start = p ,tp ,n ,ltp=1     'init
int lineCount, Lpar, Rpar, Lbrk, Rbrk, tokerr
string code,ch,tch,tk ,crlf=chr(13)+chr(10),bf,ntk
'--------------------------------------------------------------------
code = "var1=2"     + crlf  +  ' line 1
       "': b =6 "   + crlf  +  ' line 2
       ":if a>b"    + crlf     ' line 3
'--------------------------------------------------------------------
sub tokenizer(src as string) as int
lineCount=0:ltp=start
while p <= len(src)
'...............................................................................​.................          
    ch = mid(src,p,1)                                                  'get char
If asc(ch)=32 : p=p+1 : end if                                        ' skip blank space[ ]
If asc(ch)=9  : p=p+1 : end if                                        ' skip TAB [    ]
if asc(ch)=13 : p=p+1 : end if                                        ' skip CR
if asc(ch)=39                                                         ' skip comment line[ ' ]                                                      
    while asc(ch) <> 10
      p++ : ch = mid(src,p,1) : if asc(ch)= 10 then exit while
    wend
   p++: goto endLoop                                                   ' jump to end of loop
end if

If asc(ch)=10                                                         ' EOL
    if Lpar > Rpar  : tokerr=3 : goto tokExit : end if              ' if Rparen ((...)
    if Lpar < Rpar  : tokerr=4 : goto tokExit : end if              ' if Lparen (...))
    if Lbrk > Rbrk  : tokerr=5 : goto tokExit : end if              ' if Lbracket [..
    if Lbrk < Rbrk  : tokerr=6 : goto tokExit : end if              ' if Rbracket ...]
lineCount++ : tp++ : tokList[tp]="EOL" :typList[tp]= tkEOL: tk="": ch="" : p++
End if
'--------------------------------------------------------
If asc(ch)=34                                                         ' if char is QUOTE "
p++ :  ch = mid(src,p,1) : tk=ch : p++                                ' skip quote :add ch TO tk buffer: p+1
    while asc(ch) <> 34        
       ch = mid(src,p,1) : if asc(ch)= 34 then exit while
        tk=tk+ch : p++
        IF ch = chr(10): tokerr = 2: goto tokExit : end if
    wend
    tp++ : tokList[tp]= tk :typList[tp]= tkSTRING: tk="":ch="": p++    ' add quoted string to token list
End if
'-------------------------------------------------------            
If (asc(ch)>96 and asc(ch)<123)          ' [a-z]
   while (asc(ch)>96 and asc(ch)<123) or (asc(ch)>47 and asc(ch)<58)   ' [a-z0-9]*
         tk=tk+ch : p++ : ch = mid(src,p,1)
   wend
      ' ' add token ,add token type/IDENT:{VAR/COMMAND}
       tp++ : tokList[tp] = tk :typList[tp]= tkIDENT: tk="":ch=""      
End If
'--------------------------------------------------------------
If (asc(ch)>47 and asc(ch)<58)                                       ' [0-9.]
    while (asc(ch)>47 AND asc(ch)<58) OR asc(ch)=46                   ' [0-9[0.0]]*
        tk=tk+ch :p++ : ch = mid(src,p,1)
    wend
       ' add token ,add token type/NUMBER
       tp++ : tokList[tp] = tk : typList[tp]= tkNUMBER: tk="":ch=""
End if
'--------------------------------------------------------------------
If asc(ch)=43 : tp++ : tokList[tp] = ch :typList[tp]= tkPLUS:    ch="" : p++ : End if          ' + plus
If asc(ch)=45 : tp++ : tokList[tp] = ch :typList[tp]= tkMINUS:   ch="" : p++ : End if          ' - minus
If asc(ch)=42 : tp++ : tokList[tp] = ch :typList[tp]= tkMULTI:   ch="" : p++ : End if          ' * multiply
If asc(ch)=47 : tp++ : tokList[tp] = ch :typList[tp]= tkDIVIDE:  ch="" : p++ : End if            ' / divide
If asc(ch)=40 : tp++ : tokList[tp] = ch :typList[tp]= tkLPAREN:  ch="" : p++ : Lpar++ : End if ' ( Lparen
If asc(ch)=41 : tp++ : tokList[tp] = ch :typList[tp]= tkRPAREN:  ch="" : p++ : Rpar++ : End if ' ) Rparen
If asc(ch)=44 : tp++ : tokList[tp] = ch :typList[tp]= tkCOMMA:   ch="" : p++ : End if          ' , comma
If asc(ch)=58 : tp++ : tokList[tp] = ch :typList[tp]= tkCOLON:   ch="" : p++ : End if          ' : colon
If asc(ch)=60 : tp++ : tokList[tp] = ch :typList[tp]= tkLESS:    ch="" : p++ : End if          ' < less
If asc(ch)=61 : tp++ : tokList[tp] = ch :typList[tp]= tkEQUAL:   ch="" : p++ : End if          ' = equal
If asc(ch)=62 : tp++ : tokList[tp] = ch :typList[tp]= tkMORE:    ch="" : p++ : End if          ' > more(greater)
If asc(ch)=91 : tp++ : tokList[tp] = ch :typList[tp]= tkLBRACKET:ch="" : p++ : Lbrk++ :End if  ' [ Lbracket
If asc(ch)=93 : tp++ : tokList[tp] = ch :typList[tp]= tkRBRACKET:ch="" : p++ : Rbrk++ :End if  ' ] Rbracket
If asc(ch)=38 : tp++ : tokList[tp] = ch :typList[tp]= tkAND:     ch="" : p++ : End if          ' & AND
If asc(ch)=124 :tp++ : tokList[tp] = ch :typList[tp]= tkOR:      ch="": p++ : End if           ' | OR
If asc(ch)=33 : tp++ : tokList[tp] = ch :typList[tp]= tkNOT:     ch="" : p++ : End if          ' ! NOT
If asc(ch)=35 : tp++ : tokList[tp] = ch :typList[tp]= tkHASH:    ch="" : p++ : End if          ' # hash
If asc(ch)=36 : tp++ : tokList[tp] = ch :typList[tp]= tkSSTR:    ch="" : p++ : End if          ' $ $TRING
If asc(ch)=37 : tp++ : tokList[tp] = ch :typList[tp]= tkMOD :    ch="" : p++ : End if          ' % percent/MOD

IF ASC(ch)>125: tokerr = 1 : goto tokExit: END IF
'...............................................................................​..............
endLoop:
wend
Return tp
tokExit:
  IF tokerr > 0
    if tokerr = 1: MsgBox "Unknown token!-[ " + ch +" ] at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 2: MsgBox "Unclosed Quote!- at LINE: " + str(lineCount),"T:Error"              : end if
    if tokerr = 3: MsgBox "Missing right paren! ((...)- at LINE: " + str(lineCount),"T:Error"  : end if
    if tokerr = 4: MsgBox "Missing left paren!- at LINE: " + str(lineCount),"T:Error"          : end if
    if tokerr = 5: MsgBox "Missing right bracket!- at LINE: " + str(lineCount),"T:Error"       : end if
    if tokerr = 6: MsgBox "Missing left bracket!- at LINE: " + str(lineCount),"T:Error"        : end if
    Return 0
  END IF
end sub

'call tokenizer..tested(ident,numbers) /////////////////////////////////
int tn: tn = tokenizer(code) : if tn=0 then goto ExitProgram
print "Number of tokens: " + str(tn) + crlf + "Number of lines: " + str(lineCount)
for n = 1 to tn : bf = bf + tokList[n] + crlf : next n
MsgBox bf,"Token List:"

ExitProgram:
if tn=0: print "Program Terminated!": end if


Attached File(s) Image(s)
   
Find all posts by this user
Quote this message in a reply
Post Reply 


Forum Jump:


User(s) browsing this thread: