Post Reply 
Social Buttons
 
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
PyQBtokenizer in o2
04-17-2018, 08:01 PM
Post: #1
PyQBtokenizer in o2
This one work ok for now:

Code:
'Tokenizer for basic -like syntax in python
'PyQB tokenizer translation to Oxygen Basic - by Aurel 2018
#lookahead
string KEYWORDS[] = {"CLS","PRINT","IF","ELSE","FOR","TO","NEXT","ENDIF","WHILE","WEND","UNTIL","DO","LOOP","THEN"}
string SYMBOLS = ":=()+-*/<>"
string ALPHABETS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz$#"
string NUMBERS = "0123456789"
string NUMBERS_WITH_DECIMALPOINT = NUMBERS + "."
string ALPHANUMBERS = ALPHABETS + NUMBERS
string ALPHANUMBERS_WITH_UNDERSCORE = ALPHANUMBERS + "_"
string tokens  'token buffer          'tokens[1024] ' token list
string crlf = chr(13)+chr(10)

function tokenizer(string code) as string
    string token, ch
    '
    'load file?
    '
    INT i,j
    '................................
    'print str(len(code))
    '.................................
    i=1
    WHILE i <= len(code)
        
        IF instr(ALPHABETS, mid(code,i,1)) <> 0            'isAlpha
            while i <= len(code) and INSTR(ALPHABETS ,mid(code,i,1)) > 0
                token = token + mid(code, i, 1)            
                  i=i+1
             wend  
            ' PRINT str i
            'print token        
             if  ucase(token)= isKeyword(token)   ' search keyword list
                tokens = tokens + token + " : KEYWORD" + crlf
                  token=""
            else
                tokens = tokens + token + " : IDENTIFIER" + crlf  'variabe
                   token=""
             end if
            'token=""
            'i=i+1
          END IF  

        IF i <= len(code) and instr(SYMBOLS, mid(code, i, 1)) > 0  'sym operators
              token = mid(code, i, 1)
               'print token
            tokens = tokens + token + " : SYMBOL" + crlf
            i=i+1
            token=""
        END IF
  
        IF instr(NUMBERS, mid(code, i, 1)) <> 0    'numbers
            while i <= len(code) and INSTR(NUMBERS_WITH_DECIMALPOINT,mid(code,i,1)) <> 0
                token = token + mid(code,i,1)
                  i=i+1
              wend
            tokens = tokens + token + " : NUMBER" + crlf
             token=""
        END IF

        'elseif ch = chr(34) 'quote "
            'token = ""
            'i = i + 1
            'j=1
            'while i+j < len(code) and mid(code,i+1,1) <> chr(34) 'string literal""
                'token = token + mid(code,i,1)
                'j=j+1
                'tokens = tokens + token + " :STRING-LITERAL" + crlf
            'i = i + j + 1
            'wend

        if i <= len(code) and mid(code, i, 1) = " "  'whitespace
              'token=""
            i=i+1
        end if

        

        'elseif ch = chr(10)
            'tokens = tokens + ch + " :NEWLINE" + crlf
            'i=i+1
        'else
            'tokens = tokens + ch + " :UNINDENTIFIED - ERROR!" + crlf
            'i=i+1

        
    
       ' i=i+1 ' increase main iterator
    WEND

    Return tokens

end function
'...........................................................
function isKeyword(byval tok as string) as string
'string ret
for n = 1 to 14
    if ucase(tok) = KEYWORDS[n]     ' if is KEYWORD
        RETURN KEYWORDS[n]  
    end if
next n
Return ""
end function
'.............................................................
'test tokenizer
string tokenList
string input = "For n= 10 To 100 :a= a*0.35 : Next n "
'call tokenizer
tokenList = tokenizer(input)
print tokenList


Attached File(s) Image(s)
   
Find all posts by this user
Quote this message in a reply
04-17-2018, 08:14 PM
Post: #2
RE: PyQBtokenizer in o2
What is left:
double quoted string ""$ ""
square brackets [ ]
and maybe some special signs
comparison logic & - AND and |- OR
Find all posts by this user
Quote this message in a reply
Post Reply 


Forum Jump:


User(s) browsing this thread: