/Users/lyon/j4p/src/javassist/compiler/Lex.java

1    /* 
2     * Javassist, a Java-bytecode translator toolkit. 
3     * Copyright (C) 1999-2003 Shigeru Chiba. All Rights Reserved. 
4     * 
5     * The contents of this file are subject to the Mozilla Public License Version 
6     * 1.1 (the "License"); you may not use this file except in compliance with 
7     * the License.  Alternatively, the contents of this file may be used under 
8     * the terms of the GNU Lesser General Public License Version 2.1 or later. 
9     * 
10    * Software distributed under the License is distributed on an "AS IS" basis, 
11    * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 
12    * for the specific language governing rights and limitations under the 
13    * License. 
14    */ 
15    
16   package javassist.compiler; 
17    
18   class Token { 
19       public Token next = null; 
20       public int tokenId; 
21    
22       public long longValue; 
23       public double doubleValue; 
24       public String textValue; 
25   } 
26    
27   public class Lex implements TokenId { 
28       private int lastChar; 
29       private StringBuffer textBuffer; 
30       private Token currentToken; 
31       private Token lookAheadTokens; 
32    
33       private String input; 
34       private int position, maxlen, lineNumber; 
35    
36       /** 
37        * Constructs a lexical analyzer. 
38        */ 
39       public Lex(String s) { 
40           lastChar = -1; 
41           textBuffer = new StringBuffer(); 
42           currentToken = new Token(); 
43           lookAheadTokens = null; 
44    
45           input = s; 
46           position = 0; 
47           maxlen = s.length(); 
48           lineNumber = 0; 
49       } 
50    
51       public int get() { 
52           if (lookAheadTokens == null) 
53               return get(currentToken); 
54           else { 
55               Token t; 
56               currentToken = t = lookAheadTokens; 
57               lookAheadTokens = lookAheadTokens.next; 
58               return t.tokenId; 
59           } 
60       } 
61    
62       /** 
63        * Looks at the next token. 
64        */ 
65       public int lookAhead() { 
66           return lookAhead(0); 
67       } 
68    
69       public int lookAhead(int i) { 
70           Token tk = lookAheadTokens; 
71           if (tk == null) { 
72               lookAheadTokens = tk = currentToken;  // reuse an object! 
73               tk.next = null; 
74               get(tk); 
75           } 
76    
77           for (; i-- > 0; tk = tk.next) 
78               if (tk.next == null) { 
79                   Token tk2; 
80                   tk.next = tk2 = new Token(); 
81                   get(tk2); 
82               } 
83    
84           currentToken = tk; 
85           return tk.tokenId; 
86       } 
87    
88       public String getString() { 
89           return currentToken.textValue; 
90       } 
91    
92       public long getLong() { 
93           return currentToken.longValue; 
94       } 
95    
96       public double getDouble() { 
97           return currentToken.doubleValue; 
98       } 
99    
100      private int get(Token token) { 
101          int t; 
102          do { 
103              t = readLine(token); 
104          } while (t == '\n'); 
105          token.tokenId = t; 
106          return t; 
107      } 
108   
109      private int readLine(Token token) { 
110          int c = getNextNonWhiteChar(); 
111          if (c < 0) 
112              return c; 
113          else if (c == '\n') { 
114              ++lineNumber; 
115              return '\n'; 
116          } else if (c == '\'') 
117              return readCharConst(token); 
118          else if (c == '"') 
119              return readStringL(token); 
120          else if ('0' <= c && c <= '9') 
121              return readNumber(c, token); 
122          else if (c == '.') { 
123              c = getc(); 
124              if ('0' <= c && c <= '9') { 
125                  StringBuffer tbuf = textBuffer; 
126                  tbuf.setLength(0); 
127                  tbuf.append('.'); 
128                  return readDouble(tbuf, c, token); 
129              } else { 
130                  ungetc(c); 
131                  return readSeparator('.'); 
132              } 
133          } else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_' 
134                  || c == '$') 
135              return readIdentifier(c, token); 
136          else 
137              return readSeparator(c); 
138      } 
139   
140      private int getNextNonWhiteChar() { 
141          int c; 
142          do { 
143              c = getc(); 
144              if (c == '/') { 
145                  c = getc(); 
146                  if (c == '/') 
147                      do { 
148                          c = getc(); 
149                      } while (c != '\n' && c != '\r' && c != -1); 
150                  else if (c == '*') 
151                      while (true) { 
152                          c = getc(); 
153                          if (c == -1) 
154                              break; 
155                          else if (c == '*') 
156                              if ((c = getc()) == '/') { 
157                                  c = ' '; 
158                                  break; 
159                              } else 
160                                  ungetc(c); 
161                      } 
162                  else { 
163                      ungetc(c); 
164                      c = '/'; 
165                  } 
166              } 
167          } while (isBlank(c)); 
168          return c; 
169      } 
170   
171      private int readCharConst(Token token) { 
172          int c; 
173          int value = 0; 
174          while ((c = getc()) != '\'') 
175              if (c == '\\') 
176                  value = readEscapeChar(); 
177              else if (c < 0x20) { 
178                  if (c == '\n') 
179                      ++lineNumber; 
180   
181                  return BadToken; 
182              } else 
183                  value = c; 
184   
185          token.longValue = value; 
186          return CharConstant; 
187      } 
188   
189      private int readEscapeChar() { 
190          int c = getc(); 
191          if (c == 'n') 
192              c = '\n'; 
193          else if (c == 't') 
194              c = '\t'; 
195          else if (c == 'r') 
196              c = '\r'; 
197          else if (c == 'f') 
198              c = '\f'; 
199          else if (c == '\n') 
200              ++lineNumber; 
201   
202          return c; 
203      } 
204   
205      private int readStringL(Token token) { 
206          int c; 
207          StringBuffer tbuf = textBuffer; 
208          tbuf.setLength(0); 
209          for (; ;) { 
210              while ((c = getc()) != '"') { 
211                  if (c == '\\') 
212                      c = readEscapeChar(); 
213                  else if (c == '\n' || c < 0) { 
214                      ++lineNumber; 
215                      return BadToken; 
216                  } 
217   
218                  tbuf.append((char) c); 
219              } 
220   
221              for (; ;) { 
222                  c = getc(); 
223                  if (c == '\n') 
224                      ++lineNumber; 
225                  else if (!isBlank(c)) 
226                      break; 
227              } 
228   
229              if (c != '"') { 
230                  ungetc(c); 
231                  break; 
232              } 
233          } 
234   
235          token.textValue = tbuf.toString(); 
236          return StringL; 
237      } 
238   
239      private int readNumber(int c, Token token) { 
240          long value = 0; 
241          int c2 = getc(); 
242          if (c == '0') 
243              if (c2 == 'X' || c2 == 'x') 
244                  for (; ;) { 
245                      c = getc(); 
246                      if ('0' <= c && c <= '9') 
247                          value = value * 16 + (long) (c - '0'); 
248                      else if ('A' <= c && c <= 'F') 
249                          value = value * 16 + (long) (c - 'A' + 10); 
250                      else if ('a' <= c && c <= 'f') 
251                          value = value * 16 + (long) (c - 'a' + 10); 
252                      else { 
253                          token.longValue = value; 
254                          if (c == 'L' || c == 'l') 
255                              return LongConstant; 
256                          else { 
257                              ungetc(c); 
258                              return IntConstant; 
259                          } 
260                      } 
261                  } 
262              else if ('0' <= c2 && c2 <= '7') { 
263                  value = c2 - '0'; 
264                  for (; ;) { 
265                      c = getc(); 
266                      if ('0' <= c && c <= '7') 
267                          value = value * 8 + (long) (c - '0'); 
268                      else { 
269                          token.longValue = value; 
270                          if (c == 'L' || c == 'l') 
271                              return LongConstant; 
272                          else { 
273                              ungetc(c); 
274                              return IntConstant; 
275                          } 
276                      } 
277                  } 
278              } 
279   
280          value = c - '0'; 
281          while ('0' <= c2 && c2 <= '9') { 
282              value = value * 10 + c2 - '0'; 
283              c2 = getc(); 
284          } 
285   
286          token.longValue = value; 
287          if (c2 == 'F' || c2 == 'f') { 
288              token.doubleValue = (double) value; 
289              return FloatConstant; 
290          } else if (c2 == 'E' || c2 == 'e' || c2 == '.') { 
291              StringBuffer tbuf = textBuffer; 
292              tbuf.setLength(0); 
293              tbuf.append(value); 
294              return readDouble(tbuf, c2, token); 
295          } else if (c2 == 'L' || c2 == 'l') 
296              return LongConstant; 
297          else { 
298              ungetc(c2); 
299              return IntConstant; 
300          } 
301      } 
302   
303      private int readDouble(StringBuffer sbuf, int c, Token token) { 
304          if (c != 'E' && c != 'e') { 
305              sbuf.append((char) c); 
306              for (; ;) { 
307                  c = getc(); 
308                  if ('0' <= c && c <= '9') 
309                      sbuf.append((char) c); 
310                  else 
311                      break; 
312              } 
313          } 
314   
315          if (c == 'E' || c == 'e') { 
316              sbuf.append((char) c); 
317              c = getc(); 
318              if (c == '+' || c == '-') { 
319                  sbuf.append((char) c); 
320                  c = getc(); 
321              } 
322   
323              while ('0' <= c && c <= '9') { 
324                  sbuf.append((char) c); 
325                  c = getc(); 
326              } 
327          } 
328   
329          try { 
330              token.doubleValue = Double.parseDouble(sbuf.toString()); 
331          } catch (NumberFormatException e) { 
332              return BadToken; 
333          } 
334   
335          if (c == 'F' || c == 'f') 
336              return FloatConstant; 
337          else { 
338              ungetc(c); 
339              return DoubleConstant; 
340          } 
341      } 
342   
343      // !"#$%&'(    )*+,-./0    12345678    9:;<=>? 
344      private static final int[] equalOps 
345              = {NEQ, 0, 0, 0, MOD_E, AND_E, 0, 0, 
346                 0, MUL_E, PLUS_E, 0, MINUS_E, 0, DIV_E, 0, 
347                 0, 0, 0, 0, 0, 0, 0, 0, 
348                 0, 0, 0, LE, EQ, GE, 0}; 
349   
350      private int readSeparator(int c) { 
351          int c2, c3; 
352          if ('!' <= c && c <= '?') { 
353              int t = equalOps[c - '!']; 
354              if (t == 0) 
355                  return c; 
356              else { 
357                  c2 = getc(); 
358                  if (c == c2) 
359                      switch (c) { 
360                          case '=': 
361                              return EQ; 
362                          case '+': 
363                              return PLUSPLUS; 
364                          case '-': 
365                              return MINUSMINUS; 
366                          case '&': 
367                              return ANDAND; 
368                          case '<': 
369                              c3 = getc(); 
370                              if (c3 == '=') 
371                                  return LSHIFT_E; 
372                              else { 
373                                  ungetc(c3); 
374                                  return LSHIFT; 
375                              } 
376                          case '>': 
377                              c3 = getc(); 
378                              if (c3 == '=') 
379                                  return RSHIFT_E; 
380                              else if (c3 == '>') { 
381                                  c3 = getc(); 
382                                  if (c3 == '=') 
383                                      return ARSHIFT_E; 
384                                  else { 
385                                      ungetc(c3); 
386                                      return ARSHIFT; 
387                                  } 
388                              } else { 
389                                  ungetc(c3); 
390                                  return RSHIFT; 
391                              } 
392                          default : 
393                              break; 
394                      } 
395                  else if (c2 == '=') 
396                      return t; 
397              } 
398          } else if (c == '^') { 
399              c2 = getc(); 
400              if (c2 == '=') 
401                  return EXOR_E; 
402          } else if (c == '|') { 
403              c2 = getc(); 
404              if (c2 == '=') 
405                  return OR_E; 
406              else if (c2 == '|') 
407                  return OROR; 
408          } else 
409              return c; 
410   
411          ungetc(c2); 
412          return c; 
413      } 
414   
415      private int readIdentifier(int c, Token token) { 
416          StringBuffer tbuf = textBuffer; 
417          tbuf.setLength(0); 
418   
419          do { 
420              tbuf.append((char) c); 
421              c = getc(); 
422          } while ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_' 
423                  || c == '$' || '0' <= c && c <= '9'); 
424   
425          ungetc(c); 
426   
427          String name = tbuf.toString(); 
428          int t = ktable.lookup(name); 
429          if (t >= 0) 
430              return t; 
431          else { 
432              /* tbuf.toString() is executed quickly since it does not 
433               * need memory copy.  Using a hand-written extensible 
434               * byte-array class instead of StringBuffer is not a good idea 
435               * for execution speed.  Converting a byte array to a String 
436               * object is very slow.  Using an extensible char array 
437               * might be OK. 
438               */ 
439              token.textValue = name; 
440              return Identifier; 
441          } 
442      } 
443   
444      private static final KeywordTable ktable = new KeywordTable(); 
445   
446      static { 
447          ktable.append("abstract", ABSTRACT); 
448          ktable.append("boolean", BOOLEAN); 
449          ktable.append("break", BREAK); 
450          ktable.append("byte", BYTE); 
451          ktable.append("case", CASE); 
452          ktable.append("catch", CATCH); 
453          ktable.append("char", CHAR); 
454          ktable.append("class", CLASS); 
455          ktable.append("const", CONST); 
456          ktable.append("continue", CONTINUE); 
457          ktable.append("default", DEFAULT); 
458          ktable.append("do", DO); 
459          ktable.append("double", DOUBLE); 
460          ktable.append("else", ELSE); 
461          ktable.append("extends", EXTENDS); 
462          ktable.append("false", FALSE); 
463          ktable.append("final", FINAL); 
464          ktable.append("finally", FINALLY); 
465          ktable.append("float", FLOAT); 
466          ktable.append("for", FOR); 
467          ktable.append("goto", GOTO); 
468          ktable.append("if", IF); 
469          ktable.append("implements", IMPLEMENTS); 
470          ktable.append("import", IMPORT); 
471          ktable.append("instanceof", INSTANCEOF); 
472          ktable.append("int", INT); 
473          ktable.append("interface", INTERFACE); 
474          ktable.append("long", LONG); 
475          ktable.append("native", NATIVE); 
476          ktable.append("new", NEW); 
477          ktable.append("null", NULL); 
478          ktable.append("package", PACKAGE); 
479          ktable.append("private", PRIVATE); 
480          ktable.append("protected", PROTECTED); 
481          ktable.append("public", PUBLIC); 
482          ktable.append("return", RETURN); 
483          ktable.append("short", SHORT); 
484          ktable.append("static", STATIC); 
485          ktable.append("strict", STRICT); 
486          ktable.append("super", SUPER); 
487          ktable.append("switch", SWITCH); 
488          ktable.append("synchronized", SYNCHRONIZED); 
489          ktable.append("this", THIS); 
490          ktable.append("throw", THROW); 
491          ktable.append("throws", THROWS); 
492          ktable.append("transient", TRANSIENT); 
493          ktable.append("true", TRUE); 
494          ktable.append("try", TRY); 
495          ktable.append("void", VOID); 
496          ktable.append("volatile", VOLATILE); 
497          ktable.append("while", WHILE); 
498      } 
499   
500      private static boolean isBlank(int c) { 
501          return c == ' ' || c == '\t' || c == '\f' || c == '\r' 
502                  || c == '\n'; 
503      } 
504   
505      private static boolean isDigit(int c) { 
506          return '0' <= c && c <= '9'; 
507      } 
508   
509      private void ungetc(int c) { 
510          lastChar = c; 
511      } 
512   
513      private int getc() { 
514          if (lastChar < 0) 
515              if (position < maxlen) 
516                  return input.charAt(position++); 
517              else 
518                  return -1; 
519          else { 
520              int c = lastChar; 
521              lastChar = -1; 
522              return c; 
523          } 
524      } 
525  } 
526