From 78c8452874d72845824a4cf33a3199b68a17f47f Mon Sep 17 00:00:00 2001 From: Jonathan Turner Date: Tue, 21 Nov 2023 23:52:43 -0500 Subject: [PATCH] Java now works thank God ;-; --- .idea/misc.xml | 2 +- src/LexicalAnalyzer.java | 3 - src/edu/jt_kb/cs4308/compiler/Driver.java | 18 ++- .../cs4308/compiler/JavaSyntaxAnalyzer.java | 10 -- .../compiler/models/JavaSyntaxAnalyzer.java | 147 ++++++++++++++++++ .../jt_kb/cs4308/compiler/models/Pair.java | 17 ++ .../cs4308/compiler/models/TokenType.java | 3 +- 7 files changed, 182 insertions(+), 18 deletions(-) delete mode 100644 src/LexicalAnalyzer.java delete mode 100644 src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java create mode 100644 src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java create mode 100644 src/edu/jt_kb/cs4308/compiler/models/Pair.java diff --git a/.idea/misc.xml b/.idea/misc.xml index d15472f..ce297c6 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/src/LexicalAnalyzer.java b/src/LexicalAnalyzer.java deleted file mode 100644 index 54e936a..0000000 --- a/src/LexicalAnalyzer.java +++ /dev/null @@ -1,3 +0,0 @@ -public class LexicalAnalyzer { - -} diff --git a/src/edu/jt_kb/cs4308/compiler/Driver.java b/src/edu/jt_kb/cs4308/compiler/Driver.java index 9765f61..3f79089 100644 --- a/src/edu/jt_kb/cs4308/compiler/Driver.java +++ b/src/edu/jt_kb/cs4308/compiler/Driver.java @@ -1,9 +1,15 @@ package edu.jt_kb.cs4308.compiler; import edu.jt_kb.cs4308.compiler.FileManagement.FileReader; +import edu.jt_kb.cs4308.compiler.models.JavaSyntaxAnalyzer; +import edu.jt_kb.cs4308.compiler.models.Pair; +import edu.jt_kb.cs4308.compiler.models.TokenType; +import org.xml.sax.ext.LexicalHandler; import java.io.File; +import java.sql.Array; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Scanner; @@ -17,11 +23,17 @@ public class Driver { public void start() { File java = new File("src/edu/jt_kb/cs4308/compiler/resources/Java.txt"); File python = new File("src/edu/jt_kb/cs4308/compiler/resources/Python.txt"); + List code = new ArrayList<>(); + JavaSyntaxAnalyzer analyzer = new JavaSyntaxAnalyzer(); code = FileReader.readFile(java); - System.out.println(code); - code = FileReader.readFile(python); - System.out.println(code); + List analysis = analyzer.start(code); + for (Pair curr : analysis) { + if (curr.type != TokenType.EOF) { + System.out.println("Next token is: " + curr.type.value + " lexeme is " + curr.lexeme); + } + } + } diff --git a/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java b/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java deleted file mode 100644 index 08de347..0000000 --- a/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java +++ /dev/null @@ -1,10 +0,0 @@ -package edu.jt_kb.cs4308.compiler; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class JavaSyntaxAnalyzer { - List KeyWords=new ArrayList<>(Arrays.asList("public","class","static","void","int")); - -} diff --git a/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java b/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java new file mode 100644 index 0000000..699283e --- /dev/null +++ b/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java @@ -0,0 +1,147 @@ +package edu.jt_kb.cs4308.compiler.models; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class JavaSyntaxAnalyzer { + + public List start(List source) { + List values = new ArrayList<>(); + for (String line : source) { + List result = readline(line); + values.addAll(result); + } + return values; + } + + private List readline(String line) { + List analyzedLine = new ArrayList<>(); + String active = line.trim(); + Pair result = null; + do { + active = active.trim(); + result = lex(active); + if (result != null) { + analyzedLine.add(result); + if (result.lexeme.length() > 1) { + active = active.replaceFirst(result.lexeme, ""); + } else { + active = active.substring(1); + } + } + } while (result != null && result.type != TokenType.EOF); + return analyzedLine; + } + + private Pair lex(String line) { + if (line == null || line.isEmpty()) { + return null; + } + char[] letters = line.toCharArray(); + if (Character.isDigit(letters[0])) { + String lexeme = ""; + int index = 0; + while (index < letters.length && Character.isDigit(letters[index])) { + lexeme += letters[index]; + index++; + } + return new Pair(lexeme, TokenType.INT_LIT); + } + if (Character.isLetter(letters[0])) { + String lexeme = ""; + int index = 0; + while (index < letters.length + && (Character.isLetter(letters[index]) + || Character.isDigit(letters[index]))) { + lexeme += letters[index]; + index++; + } + Pair lookup_result = lookup(lexeme); + if (lookup_result.type != TokenType.UNKNOWN) { + return lookup_result; + } + return new Pair(lexeme, TokenType.IDENT); + } + if (letters[0] == '"') { + String lexeme = "\""; + int index = 1; + while (index < letters.length) { + lexeme += letters[index]; + if (letters[index] == '"') { + break; + } + index++; + } + return new Pair(lexeme, TokenType.STRING_LITERAL); + } + Pair temp = lookup(line.split(" ")[0]); + if (temp.type != TokenType.UNKNOWN) { + return temp; + } + return lookup(letters[0]); + } + + public Pair lookup(char lexeme) { + TokenType token = null; + switch (lexeme) { + case '=': + token = TokenType.ASSIGN_OP; + break; + case '+': + token = TokenType.ADD_OP; + break; + case '-': + token = TokenType.SUB_OP; + break; + case '*': + token = TokenType.MULT_OP; + break; + case '/': + token = TokenType.DIV_OP; + break; + case ';': + token = TokenType.SEMI_COLON; + break; + case ',': + token = TokenType.COMMA; + break; + case '{': + token = TokenType.LEFT_CURLY; + break; + case '}': + token = TokenType.RIGHT_CURLY; + break; + case '(': + token = TokenType.LEFT_PAREN; + break; + case ')': + token = TokenType.RIGHT_PAREN; + break; + case '[': + token = TokenType.LEFT_SQUARE; + break; + case ']': + token = TokenType.RIGHT_SQUARE; + break; + case '$': + token = TokenType.EOF; + break; + default: + token = TokenType.UNKNOWN; + } + return new Pair(String.valueOf(lexeme), token); + } + + private Pair lookup(String lexeme) { + TokenType token = null; + List keywords = new ArrayList<>(Arrays.asList("public","class","static","void","int")); + if (keywords.contains(lexeme.trim())) { + token = TokenType.RESERVED_WORD; + } else { + token = TokenType.UNKNOWN; + } + return new Pair(lexeme, token); + } + +} diff --git a/src/edu/jt_kb/cs4308/compiler/models/Pair.java b/src/edu/jt_kb/cs4308/compiler/models/Pair.java new file mode 100644 index 0000000..90535e8 --- /dev/null +++ b/src/edu/jt_kb/cs4308/compiler/models/Pair.java @@ -0,0 +1,17 @@ +package edu.jt_kb.cs4308.compiler.models; + +public class Pair { + + public String lexeme; + public TokenType type; + + public Pair (String lexeme, TokenType type) { + this.lexeme = lexeme; + this.type = type; + } + + @Override + public String toString() { + return lexeme + " : " + type; + } +} diff --git a/src/edu/jt_kb/cs4308/compiler/models/TokenType.java b/src/edu/jt_kb/cs4308/compiler/models/TokenType.java index 857eb4a..4c3b0f2 100644 --- a/src/edu/jt_kb/cs4308/compiler/models/TokenType.java +++ b/src/edu/jt_kb/cs4308/compiler/models/TokenType.java @@ -3,7 +3,8 @@ package edu.jt_kb.cs4308.compiler.models; public enum TokenType { INT_LIT(10),IDENT(11),ASSIGN_OP(20),ADD_OP(21),SUB_OP(22),MULT_OP(23), DIV_OP(24),LEFT_PAREN(26),RIGHT_PAREN(27),SEMI_COLON(28), STRING_LITERAL(29), - COMMA(30), LEFT_CURLY(31), RIGHT_CURLY(32), LEFT_SQUARE(33), RIGHT_SQUARE(34); + COMMA(30), LEFT_CURLY(31), RIGHT_CURLY(32), LEFT_SQUARE(33), RIGHT_SQUARE(34), + RESERVED_WORD(35),EOF(-1),UNKNOWN(-2); public final int value; /* represents which Token you are referencing. */