diff --git a/.idea/misc.xml b/.idea/misc.xml
index d15472f..ce297c6 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
-
+
\ No newline at end of file
diff --git a/src/LexicalAnalyzer.java b/src/LexicalAnalyzer.java
deleted file mode 100644
index 54e936a..0000000
--- a/src/LexicalAnalyzer.java
+++ /dev/null
@@ -1,3 +0,0 @@
-public class LexicalAnalyzer {
-
-}
diff --git a/src/edu/jt_kb/cs4308/compiler/Driver.java b/src/edu/jt_kb/cs4308/compiler/Driver.java
index 9765f61..3f79089 100644
--- a/src/edu/jt_kb/cs4308/compiler/Driver.java
+++ b/src/edu/jt_kb/cs4308/compiler/Driver.java
@@ -1,9 +1,15 @@
package edu.jt_kb.cs4308.compiler;
import edu.jt_kb.cs4308.compiler.FileManagement.FileReader;
+import edu.jt_kb.cs4308.compiler.models.JavaSyntaxAnalyzer;
+import edu.jt_kb.cs4308.compiler.models.Pair;
+import edu.jt_kb.cs4308.compiler.models.TokenType;
+import org.xml.sax.ext.LexicalHandler;
import java.io.File;
+import java.sql.Array;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Scanner;
@@ -17,11 +23,17 @@ public class Driver {
public void start() {
File java = new File("src/edu/jt_kb/cs4308/compiler/resources/Java.txt");
File python = new File("src/edu/jt_kb/cs4308/compiler/resources/Python.txt");
+
List code = new ArrayList<>();
+ JavaSyntaxAnalyzer analyzer = new JavaSyntaxAnalyzer();
code = FileReader.readFile(java);
- System.out.println(code);
- code = FileReader.readFile(python);
- System.out.println(code);
+ List analysis = analyzer.start(code);
+ for (Pair curr : analysis) {
+ if (curr.type != TokenType.EOF) {
+ System.out.println("Next token is: " + curr.type.value + " lexeme is " + curr.lexeme);
+ }
+ }
+
}
diff --git a/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java b/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java
deleted file mode 100644
index 08de347..0000000
--- a/src/edu/jt_kb/cs4308/compiler/JavaSyntaxAnalyzer.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package edu.jt_kb.cs4308.compiler;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-public class JavaSyntaxAnalyzer {
- List KeyWords=new ArrayList<>(Arrays.asList("public","class","static","void","int"));
-
-}
diff --git a/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java b/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java
new file mode 100644
index 0000000..699283e
--- /dev/null
+++ b/src/edu/jt_kb/cs4308/compiler/models/JavaSyntaxAnalyzer.java
@@ -0,0 +1,147 @@
+package edu.jt_kb.cs4308.compiler.models;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class JavaSyntaxAnalyzer {
+
+ public List start(List source) {
+ List values = new ArrayList<>();
+ for (String line : source) {
+ List result = readline(line);
+ values.addAll(result);
+ }
+ return values;
+ }
+
+ private List readline(String line) {
+ List analyzedLine = new ArrayList<>();
+ String active = line.trim();
+ Pair result = null;
+ do {
+ active = active.trim();
+ result = lex(active);
+ if (result != null) {
+ analyzedLine.add(result);
+ if (result.lexeme.length() > 1) {
+ active = active.replaceFirst(result.lexeme, "");
+ } else {
+ active = active.substring(1);
+ }
+ }
+ } while (result != null && result.type != TokenType.EOF);
+ return analyzedLine;
+ }
+
+ private Pair lex(String line) {
+ if (line == null || line.isEmpty()) {
+ return null;
+ }
+ char[] letters = line.toCharArray();
+ if (Character.isDigit(letters[0])) {
+ String lexeme = "";
+ int index = 0;
+ while (index < letters.length && Character.isDigit(letters[index])) {
+ lexeme += letters[index];
+ index++;
+ }
+ return new Pair(lexeme, TokenType.INT_LIT);
+ }
+ if (Character.isLetter(letters[0])) {
+ String lexeme = "";
+ int index = 0;
+ while (index < letters.length
+ && (Character.isLetter(letters[index])
+ || Character.isDigit(letters[index]))) {
+ lexeme += letters[index];
+ index++;
+ }
+ Pair lookup_result = lookup(lexeme);
+ if (lookup_result.type != TokenType.UNKNOWN) {
+ return lookup_result;
+ }
+ return new Pair(lexeme, TokenType.IDENT);
+ }
+ if (letters[0] == '"') {
+ String lexeme = "\"";
+ int index = 1;
+ while (index < letters.length) {
+ lexeme += letters[index];
+ if (letters[index] == '"') {
+ break;
+ }
+ index++;
+ }
+ return new Pair(lexeme, TokenType.STRING_LITERAL);
+ }
+ Pair temp = lookup(line.split(" ")[0]);
+ if (temp.type != TokenType.UNKNOWN) {
+ return temp;
+ }
+ return lookup(letters[0]);
+ }
+
+ public Pair lookup(char lexeme) {
+ TokenType token = null;
+ switch (lexeme) {
+ case '=':
+ token = TokenType.ASSIGN_OP;
+ break;
+ case '+':
+ token = TokenType.ADD_OP;
+ break;
+ case '-':
+ token = TokenType.SUB_OP;
+ break;
+ case '*':
+ token = TokenType.MULT_OP;
+ break;
+ case '/':
+ token = TokenType.DIV_OP;
+ break;
+ case ';':
+ token = TokenType.SEMI_COLON;
+ break;
+ case ',':
+ token = TokenType.COMMA;
+ break;
+ case '{':
+ token = TokenType.LEFT_CURLY;
+ break;
+ case '}':
+ token = TokenType.RIGHT_CURLY;
+ break;
+ case '(':
+ token = TokenType.LEFT_PAREN;
+ break;
+ case ')':
+ token = TokenType.RIGHT_PAREN;
+ break;
+ case '[':
+ token = TokenType.LEFT_SQUARE;
+ break;
+ case ']':
+ token = TokenType.RIGHT_SQUARE;
+ break;
+ case '$':
+ token = TokenType.EOF;
+ break;
+ default:
+ token = TokenType.UNKNOWN;
+ }
+ return new Pair(String.valueOf(lexeme), token);
+ }
+
+ private Pair lookup(String lexeme) {
+ TokenType token = null;
+ List keywords = new ArrayList<>(Arrays.asList("public","class","static","void","int"));
+ if (keywords.contains(lexeme.trim())) {
+ token = TokenType.RESERVED_WORD;
+ } else {
+ token = TokenType.UNKNOWN;
+ }
+ return new Pair(lexeme, token);
+ }
+
+}
diff --git a/src/edu/jt_kb/cs4308/compiler/models/Pair.java b/src/edu/jt_kb/cs4308/compiler/models/Pair.java
new file mode 100644
index 0000000..90535e8
--- /dev/null
+++ b/src/edu/jt_kb/cs4308/compiler/models/Pair.java
@@ -0,0 +1,17 @@
+package edu.jt_kb.cs4308.compiler.models;
+
+public class Pair {
+
+ public String lexeme;
+ public TokenType type;
+
+ public Pair (String lexeme, TokenType type) {
+ this.lexeme = lexeme;
+ this.type = type;
+ }
+
+ @Override
+ public String toString() {
+ return lexeme + " : " + type;
+ }
+}
diff --git a/src/edu/jt_kb/cs4308/compiler/models/TokenType.java b/src/edu/jt_kb/cs4308/compiler/models/TokenType.java
index 857eb4a..4c3b0f2 100644
--- a/src/edu/jt_kb/cs4308/compiler/models/TokenType.java
+++ b/src/edu/jt_kb/cs4308/compiler/models/TokenType.java
@@ -3,7 +3,8 @@ package edu.jt_kb.cs4308.compiler.models;
public enum TokenType {
INT_LIT(10),IDENT(11),ASSIGN_OP(20),ADD_OP(21),SUB_OP(22),MULT_OP(23),
DIV_OP(24),LEFT_PAREN(26),RIGHT_PAREN(27),SEMI_COLON(28), STRING_LITERAL(29),
- COMMA(30), LEFT_CURLY(31), RIGHT_CURLY(32), LEFT_SQUARE(33), RIGHT_SQUARE(34);
+ COMMA(30), LEFT_CURLY(31), RIGHT_CURLY(32), LEFT_SQUARE(33), RIGHT_SQUARE(34),
+ RESERVED_WORD(35),EOF(-1),UNKNOWN(-2);
public final int value; /* represents which Token you are referencing. */