diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..9215f06 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +Driver.java \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index ce297c6..d15472f 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/src/edu/jt_kb/cs4308/compiler/Driver.java b/src/edu/jt_kb/cs4308/compiler/Driver.java index 03bebba..2d08df4 100644 --- a/src/edu/jt_kb/cs4308/compiler/Driver.java +++ b/src/edu/jt_kb/cs4308/compiler/Driver.java @@ -3,6 +3,7 @@ package edu.jt_kb.cs4308.compiler; import edu.jt_kb.cs4308.compiler.FileManagement.FileReader; import edu.jt_kb.cs4308.compiler.models.JavaSyntaxAnalyzer; import edu.jt_kb.cs4308.compiler.models.Pair; +import edu.jt_kb.cs4308.compiler.models.PythonSyntaxAnalyzer; import edu.jt_kb.cs4308.compiler.models.TokenType; import java.io.File; @@ -109,7 +110,8 @@ public class Driver { JavaSyntaxAnalyzer analyzer = new JavaSyntaxAnalyzer(); results = analyzer.start(this.java); } else { - // Python Analyzer Code + PythonSyntaxAnalyzer analyzer = new PythonSyntaxAnalyzer(); + results = analyzer.start(this.python); } for (Pair lex : results) { System.out.println("The next token is: " + lex.type.value + " next lexeme is: " + lex.lexeme); @@ -122,7 +124,8 @@ public class Driver { JavaSyntaxAnalyzer analyzer = new JavaSyntaxAnalyzer(); results = analyzer.start(source); } else { - // Python Analyzer Code + PythonSyntaxAnalyzer analyzer = new PythonSyntaxAnalyzer(); + results = analyzer.start(source); } for (Pair lex : results) { System.out.println("The next token is: " + lex.type.value + " next lexeme is: " + lex.lexeme); diff --git a/src/edu/jt_kb/cs4308/compiler/models/PythonSyntaxAnalyzer.java b/src/edu/jt_kb/cs4308/compiler/models/PythonSyntaxAnalyzer.java new file mode 100644 index 0000000..7280195 --- /dev/null +++ b/src/edu/jt_kb/cs4308/compiler/models/PythonSyntaxAnalyzer.java @@ -0,0 +1,138 @@ +package edu.jt_kb.cs4308.compiler.models; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class PythonSyntaxAnalyzer { + public List start(List source) { + List values = new ArrayList<>(); + for (String line : source) { + List result = readline(line); + values.addAll(result); + } + return values; + } + private List readline(String line) { + List analyzedLine = new ArrayList<>(); + String active = line.trim(); + Pair result = null; + do { + active = active.trim(); + result = lex(active); + if (result != null) { + analyzedLine.add(result); + if (result.lexeme.length() > 1) { + active = active.replaceFirst(result.lexeme, ""); + } else { + active = active.substring(1); + } + } + } while (result != null && result.type != TokenType.EOF); + return analyzedLine; + } + + private Pair lex(String line) { + if (line == null || line.isEmpty()) { + return null; + } + char[] letters = line.toCharArray(); + if (Character.isDigit(letters[0])) { + String lexeme = ""; + int index = 0; + while (index < letters.length && Character.isDigit(letters[index])) { + lexeme += letters[index]; + index++; + } + return new Pair(lexeme, TokenType.INT_LIT); + } + if (Character.isLetter(letters[0])) { + String lexeme = ""; + int index = 0; + while (index < letters.length + && (Character.isLetter(letters[index]) + || Character.isDigit(letters[index]))) { + lexeme += letters[index]; + index++; + } + Pair lookup_result = lookup(lexeme); + if (lookup_result.type != TokenType.UNKNOWN) { + return lookup_result; + } + return new Pair(lexeme, TokenType.IDENT); + } + if (letters[0] == '\'') { + String lexeme = "\'" ; + int index = 1; + while (index < letters.length) { + lexeme += letters[index]; + if (letters[index] == '\'') { + break; + } + index++; + } + return new Pair(lexeme, TokenType.STRING_LITERAL); + } + Pair temp = lookup(line.split(" ")[0]); + if (temp.type != TokenType.UNKNOWN) { + return temp; + } + return lookup(letters[0]); + } + + public Pair lookup(char lexeme) { + TokenType token = null; + switch (lexeme) { + case '=': + token = TokenType.ASSIGN_OP; + break; + case '+': + token = TokenType.ADD_OP; + break; + case '-': + token = TokenType.SUB_OP; + break; + case '*': + token = TokenType.MULT_OP; + break; + case '/': + token = TokenType.DIV_OP; + break; + case ';': + token = TokenType.SEMI_COLON; + break; + case ',': + token = TokenType.COMMA; + break; + case '(': + token = TokenType.LEFT_PAREN; + break; + case ')': + token = TokenType.RIGHT_PAREN; + break; + case '[': + token = TokenType.LEFT_SQUARE; + break; + case ']': + token = TokenType.RIGHT_SQUARE; + break; + case '$': + token = TokenType.EOF; + break; + default: + token = TokenType.UNKNOWN; + } + return new Pair(String.valueOf(lexeme), token); + } + + private Pair lookup(String lexeme) { + TokenType token = null; + List keywords = new ArrayList<>(Arrays.asList("public","class","static","void","int")); + if (keywords.contains(lexeme.trim())) { + token = TokenType.RESERVED_WORD; + } else { + token = TokenType.UNKNOWN; + } + return new Pair(lexeme, token); + } +}