001 /* The following code was generated by JFlex 1.4.1 on 13/07/06 20:57 */ 002 003 /* CSVLexer.java is a generated file. You probably want to 004 * edit CSVLexer.lex to make changes. Use JFlex to generate it. 005 * JFlex may be obtained from 006 * <a href="http://jflex.de">the JFlex website</a>. 007 * This file was tested to work with jflex 1.4 (and may not 008 * work with more recent version because it needs a skeleton file) 009 * Run: <br> 010 * jflex --skel csv.jflex.skel CSVLexer.lex<br> 011 * You will then have a file called CSVLexer.java 012 */ 013 014 /* 015 * Read files in comma separated value format. 016 * Copyright (C) 2001-2004 Stephen Ostermiller 017 * http://ostermiller.org/contact.pl?regarding=Java+Utilities 018 * 019 * This program is free software; you can redistribute it and/or modify 020 * it under the terms of the GNU General Public License as published by 021 * the Free Software Foundation; either version 2 of the License, or 022 * (at your option) any later version. 023 * 024 * This program is distributed in the hope that it will be useful, 025 * but WITHOUT ANY WARRANTY; without even the implied warranty of 026 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 027 * GNU General Public License for more details. 028 * 029 * See COPYING.TXT for details. 030 */ 031 032 //package com.Ostermiller.util; 033 package net.sf.logdistiller.util.csv; 034 035 import java.io.*; 036 037 /** 038 * Read files in comma separated value format. The use of this class is no longer recommended. It is now recommended 039 * that you use com.Ostermiller.util.CSVParser instead. That class, has a cleaner API, and methods for returning all the 040 * values on a line in a String array. CSV is a file format used as a portable representation of a database. Each line 041 * is one entry or record and the fields in a record are separated by commas. Commas may be preceded or followed by 042 * arbitrary space and/or tab characters which are ignored. 043 * <P> 044 * If field includes a comma or a new line, the whole field must be surrounded with double quotes. When the field is in 045 * quotes, any quote literals must be escaped by \" Backslash literals must be escaped by \\. Otherwise a backslash an 046 * the character following it will be treated as the following character, ie."\n" is equivelent to "n". Other escape 047 * sequences may be set using the setEscapes() method. Text that comes after quotes that have been closed but come 048 * before the next comma will be ignored. 049 * <P> 050 * Empty fields are returned as as String of length zero: "". The following line has four empty fields and two non-empty 051 * fields in it. There is an empty field on each end, and two in the middle.<br> 052 * 053 * <pre> 054 * ,second,, ,fifth, 055 * </pre> 056 * <P> 057 * Blank lines are always ignored. Other lines will be ignored if they start with a comment character as set by the 058 * setCommentStart() method. 059 * <P> 060 * An example of how CVSLexer might be used: 061 * 062 * <pre> 063 * CSVLexer shredder = new CSVLexer( System.in ); 064 * shredder.setCommentStart( "#;!" ); 065 * shredder.setEscapes( "nrtf", "\n\r\t\f" ); 066 * String t; 067 * while ( ( t = shredder.getNextToken() ) != null ) 068 * { 069 * System.out.println( "" + shredder.getLineNumber() + " " + t ); 070 * } 071 * </pre> 072 * 073 * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities 074 * @since ostermillerutils 1.00.00 075 */ 076 077 public class CSVLexer 078 { 079 080 /** This character denotes the end of file */ 081 public static final int YYEOF = -1; 082 083 /** initial size of the lookahead buffer */ 084 private static final int ZZ_BUFFERSIZE = 16384; 085 086 /** lexical states */ 087 public static final int BEFORE = 1; 088 089 public static final int YYINITIAL = 0; 090 091 public static final int COMMENT = 3; 092 093 public static final int AFTER = 2; 094 095 /** 096 * Translates characters to character classes 097 */ 098 private static final String ZZ_CMAP_PACKED = 099 "\11\0\1\1\1\3\1\0\1\1\1\2\22\0\1\1\1\0\1\5" + "\11\0\1\4\57\0\1\6\uffa3\0"; 100 101 /** 102 * Translates characters to character classes 103 */ 104 private static final char[] ZZ_CMAP = zzUnpackCMap( ZZ_CMAP_PACKED ); 105 106 /** 107 * Translates DFA states to action switch labels. 108 */ 109 private static final int[] ZZ_ACTION = zzUnpackAction(); 110 111 private static final String ZZ_ACTION_PACKED_0 = 112 "\1\0\3\1\1\2\1\3\2\4\1\5\1\6\1\7" + "\1\1\2\10\1\11\1\12\2\1\1\13\1\1\2\0" + "\1\14\2\0\1\15\1\0"; 113 114 private static int[] zzUnpackAction() 115 { 116 int[] result = new int[27]; 117 int offset = 0; 118 offset = zzUnpackAction( ZZ_ACTION_PACKED_0, offset, result ); 119 return result; 120 } 121 122 private static int zzUnpackAction( String packed, int offset, int[] result ) 123 { 124 int i = 0; /* index in packed string */ 125 int j = offset; /* index in unpacked array */ 126 int l = packed.length(); 127 while ( i < l ) 128 { 129 int count = packed.charAt( i++ ); 130 int value = packed.charAt( i++ ); 131 do 132 result[j++] = value; 133 while ( --count > 0 ); 134 } 135 return j; 136 } 137 138 /** 139 * Translates a state to a row index in the transition table 140 */ 141 private static final int[] ZZ_ROWMAP = zzUnpackRowMap(); 142 143 private static final String ZZ_ROWMAP_PACKED_0 = 144 "\0\0\0\7\0\16\0\25\0\34\0\43\0\52\0\61" + "\0\61\0\70\0\77\0\106\0\115\0\61\0\61\0\124" 145 + "\0\133\0\142\0\61\0\151\0\34\0\43\0\61\0\160" + "\0\77\0\61\0\167"; 146 147 private static int[] zzUnpackRowMap() 148 { 149 int[] result = new int[27]; 150 int offset = 0; 151 offset = zzUnpackRowMap( ZZ_ROWMAP_PACKED_0, offset, result ); 152 return result; 153 } 154 155 private static int zzUnpackRowMap( String packed, int offset, int[] result ) 156 { 157 int i = 0; /* index in packed string */ 158 int j = offset; /* index in unpacked array */ 159 int l = packed.length(); 160 while ( i < l ) 161 { 162 int high = packed.charAt( i++ ) << 16; 163 result[j++] = high | packed.charAt( i++ ); 164 } 165 return j; 166 } 167 168 /** 169 * The transition table of the DFA 170 */ 171 private static final int[] ZZ_TRANS = zzUnpackTrans(); 172 173 private static final String ZZ_TRANS_PACKED_0 = 174 "\1\5\1\6\1\7\1\10\1\11\1\12\1\5\1\13" + "\1\14\1\15\1\16\1\17\1\20\1\13\1\21\1\22" 175 + "\1\7\1\10\1\23\2\21\1\24\1\4\1\7\1\10" + "\3\24\1\5\1\25\3\0\2\5\1\0\1\26\1\7" 176 + "\1\10\6\0\1\10\12\0\5\12\1\27\1\30\1\13" + "\1\31\3\0\2\13\1\0\1\14\10\0\1\16\3\0" 177 + "\5\20\1\32\1\33\2\21\3\0\3\21\1\22\1\7" + "\1\10\1\0\2\21\2\24\2\0\3\24\7\12\7\20"; 178 179 private static int[] zzUnpackTrans() 180 { 181 int[] result = new int[126]; 182 int offset = 0; 183 offset = zzUnpackTrans( ZZ_TRANS_PACKED_0, offset, result ); 184 return result; 185 } 186 187 private static int zzUnpackTrans( String packed, int offset, int[] result ) 188 { 189 int i = 0; /* index in packed string */ 190 int j = offset; /* index in unpacked array */ 191 int l = packed.length(); 192 while ( i < l ) 193 { 194 int count = packed.charAt( i++ ); 195 int value = packed.charAt( i++ ); 196 value--; 197 do 198 result[j++] = value; 199 while ( --count > 0 ); 200 } 201 return j; 202 } 203 204 /** 205 * Per instance reference to the character map. This can be cloned and modified per instance if needed. It is 206 * initally set to the static value. 207 */ 208 private char[] zzcmap_instance = ZZ_CMAP; 209 210 /* error codes */ 211 private static final int ZZ_UNKNOWN_ERROR = 0; 212 213 private static final int ZZ_NO_MATCH = 1; 214 215 private static final int ZZ_PUSHBACK_2BIG = 2; 216 217 /* error messages for the codes above */ 218 private static final String ZZ_ERROR_MSG[] = 219 { "Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large" }; 220 221 /** 222 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> 223 */ 224 private static final int[] ZZ_ATTRIBUTE = zzUnpackAttribute(); 225 226 private static final String ZZ_ATTRIBUTE_PACKED_0 = 227 "\1\0\6\1\2\11\4\1\2\11\3\1\1\11\1\1" + "\2\0\1\11\2\0\1\11\1\0"; 228 229 private static int[] zzUnpackAttribute() 230 { 231 int[] result = new int[27]; 232 int offset = 0; 233 offset = zzUnpackAttribute( ZZ_ATTRIBUTE_PACKED_0, offset, result ); 234 return result; 235 } 236 237 private static int zzUnpackAttribute( String packed, int offset, int[] result ) 238 { 239 int i = 0; /* index in packed string */ 240 int j = offset; /* index in unpacked array */ 241 int l = packed.length(); 242 while ( i < l ) 243 { 244 int count = packed.charAt( i++ ); 245 int value = packed.charAt( i++ ); 246 do 247 result[j++] = value; 248 while ( --count > 0 ); 249 } 250 return j; 251 } 252 253 /** the input device */ 254 private java.io.Reader zzReader; 255 256 /** the current state of the DFA */ 257 private int zzState; 258 259 /** the current lexical state */ 260 private int zzLexicalState = YYINITIAL; 261 262 /** 263 * this buffer contains the current text to be matched and is the source of the yytext() string 264 */ 265 private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; 266 267 /** the textposition at the last accepting state */ 268 private int zzMarkedPos; 269 270 /** the textposition at the last state to be included in yytext */ 271 private int zzPushbackPos; 272 273 /** the current text position in the buffer */ 274 private int zzCurrentPos; 275 276 /** startRead marks the beginning of the yytext() string in the buffer */ 277 private int zzStartRead; 278 279 /** 280 * endRead marks the last character in the buffer, that has been read from input 281 */ 282 private int zzEndRead; 283 284 /** number of newlines encountered up to the start of the matched text */ 285 private int yyline; 286 287 /** the number of characters up to the start of the matched text */ 288 private int yychar; 289 290 /** 291 * the number of characters from the last newline up to the start of the matched text 292 */ 293 private int yycolumn; 294 295 /** 296 * zzAtBOL == true <=> the scanner is currently at the beginning of a line 297 */ 298 private boolean zzAtBOL = true; 299 300 /** zzAtEOF == true <=> the scanner is at the EOF */ 301 private boolean zzAtEOF; 302 303 /* user code: */ 304 /** 305 * Prints out tokens and line numbers from a file or System.in. If no arguments are given, System.in will be used 306 * for input. If more arguments are given, the first argument will be used as the name of the file to use as input 307 * 308 * @param args program arguments, of which the first is a filename 309 * @since ostermillerutils 1.00.00 310 */ 311 public static void main( String[] args ) 312 { 313 InputStream in; 314 try 315 { 316 if ( args.length > 0 ) 317 { 318 File f = new File( args[0] ); 319 if ( f.exists() ) 320 { 321 if ( f.canRead() ) 322 { 323 in = new FileInputStream( f ); 324 } 325 else 326 { 327 throw new IOException( "Could not open " + args[0] ); 328 } 329 } 330 else 331 { 332 throw new IOException( "Could not find " + args[0] ); 333 } 334 } 335 else 336 { 337 in = System.in; 338 } 339 CSVLexer shredder = new CSVLexer( in ); 340 shredder.setCommentStart( "#;!" ); 341 shredder.setEscapes( "nrtf", "\n\r\t\f" ); 342 String t; 343 while ( ( t = shredder.getNextToken() ) != null ) 344 { 345 System.out.println( "" + shredder.getLineNumber() + " " + t ); 346 } 347 } 348 catch ( IOException e ) 349 { 350 System.out.println( e.getMessage() ); 351 } 352 } 353 354 private char delimiter = ','; 355 356 private char quote = '\"'; 357 358 /** 359 * Checks that zzcmap_instance is an instance variable (not just a pointer to a static variable). If it is a pointer 360 * to a static variable, it will be cloned. 361 * 362 * @since ostermillerutils 1.00.00 363 */ 364 private void ensureCharacterMapIsInstance() 365 { 366 if ( ZZ_CMAP == zzcmap_instance ) 367 { 368 zzcmap_instance = new char[ZZ_CMAP.length]; 369 System.arraycopy( ZZ_CMAP, 0, zzcmap_instance, 0, ZZ_CMAP.length ); 370 } 371 } 372 373 /** 374 * Ensures that the given character is not used for some special purpose in parsing. This method should be called 375 * before setting some character to be a delimiter so that the parsing doesn't break. Examples of bad characters are 376 * quotes, commas, and whitespace. 377 * 378 * @since ostermillerutils 1.00.00 379 */ 380 private boolean charIsSafe( char c ) 381 { 382 // There are two character classes that one could use as a delimiter. 383 // The first would be the class that most characters are in. These 384 // are normally data. The second is the class that the tab is usually in. 385 return ( zzcmap_instance[c] == ZZ_CMAP['a'] || zzcmap_instance[c] == ZZ_CMAP['\t'] ); 386 } 387 388 /** 389 * Change the character classes of the two given characters. This will make the state machine behave as if the 390 * characters were switched when they are encountered in the input. 391 * 392 * @param old the old character, its value will be returned to initial 393 * @param two second character 394 * @since ostermillerutils 1.00.00 395 */ 396 private void updateCharacterClasses( char oldChar, char newChar ) 397 { 398 // before modifying the character map, make sure it isn't static. 399 ensureCharacterMapIsInstance(); 400 // make the newChar behave like the oldChar 401 zzcmap_instance[newChar] = zzcmap_instance[oldChar]; 402 // make the oldChar behave like it isn't special. 403 switch ( oldChar ) 404 { 405 case ',': 406 case '"': 407 { 408 // These should act as normal character, 409 // not delimiters or quotes right now. 410 zzcmap_instance[oldChar] = ZZ_CMAP['a']; 411 } 412 break; 413 default: 414 { 415 // Set the it back to the way it would act 416 // if not used as a delimiter or quote. 417 zzcmap_instance[oldChar] = ZZ_CMAP[oldChar]; 418 } 419 break; 420 } 421 } 422 423 /** 424 * Change this Lexer so that it uses a new delimiter. 425 * <p> 426 * The initial character is a comma, the delimiter cannot be changed to a quote or other character that has special 427 * meaning in CSV. 428 * 429 * @param newDelim delimiter to which to switch. 430 * @throws BadDelimiterException if the character cannot be used as a delimiter. 431 * @since ostermillerutils 1.00.00 432 */ 433 public void changeDelimiter( char newDelim ) 434 throws BadDelimiterException 435 { 436 if ( newDelim == delimiter ) 437 return; // no need to do anything. 438 if ( !charIsSafe( newDelim ) ) 439 { 440 throw new BadDelimiterException( newDelim + " is not a safe delimiter." ); 441 } 442 updateCharacterClasses( delimiter, newDelim ); 443 // keep a record of the current delimiter. 444 delimiter = newDelim; 445 } 446 447 /** 448 * Change this Lexer so that it uses a new character for quoting. 449 * <p> 450 * The initial character is a double quote ("), the delimiter cannot be changed to a comma or other character that 451 * has special meaning in CSV. 452 * 453 * @param newQuote character to use for quoting. 454 * @throws BadQuoteException if the character cannot be used as a quote. 455 * @since ostermillerutils 1.00.00 456 */ 457 public void changeQuote( char newQuote ) 458 throws BadQuoteException 459 { 460 if ( newQuote == quote ) 461 return; // no need to do anything. 462 if ( !charIsSafe( newQuote ) ) 463 { 464 throw new BadQuoteException( newQuote + " is not a safe quote." ); 465 } 466 updateCharacterClasses( quote, newQuote ); 467 // keep a record of the current quote. 468 quote = newQuote; 469 } 470 471 private String escapes = ""; 472 473 private String replacements = ""; 474 475 /** 476 * Specify escape sequences and their replacements. Escape sequences set here are in addition to \\ and \". \\ and 477 * \" are always valid escape sequences. This method allows standard escape sequenced to be used. For example "\n" 478 * can be set to be a newline rather than an 'n'. A common way to call this method might be:<br> 479 * <code>setEscapes("nrtf", "\n\r\t\f");</code><br> 480 * which would set the escape sequences to be the Java escape sequences. Characters that follow a \ that are not 481 * escape sequences will still be interpreted as that character.<br> 482 * The two arguemnts to this method must be the same length. If they are not, the longer of the two will be 483 * truncated. 484 * 485 * @param escapes a list of characters that will represent escape sequences. 486 * @param replacements the list of repacement characters for those escape sequences. 487 * @since ostermillerutils 1.00.00 488 */ 489 public void setEscapes( String escapes, String replacements ) 490 { 491 int length = escapes.length(); 492 if ( replacements.length() < length ) 493 { 494 length = replacements.length(); 495 } 496 this.escapes = escapes.substring( 0, length ); 497 this.replacements = replacements.substring( 0, length ); 498 } 499 500 private String unescape( String s ) 501 { 502 if ( s.indexOf( '\\' ) == -1 ) 503 { 504 return s.substring( 1, s.length() - 1 ); 505 } 506 StringBuffer sb = new StringBuffer( s.length() ); 507 for ( int i = 1; i < s.length() - 1; i++ ) 508 { 509 char c = s.charAt( i ); 510 if ( c == '\\' ) 511 { 512 char c1 = s.charAt( ++i ); 513 int index; 514 if ( c1 == '\\' || c1 == '\"' ) 515 { 516 sb.append( c1 ); 517 } 518 else if ( ( index = escapes.indexOf( c1 ) ) != -1 ) 519 { 520 sb.append( replacements.charAt( index ) ); 521 } 522 else 523 { 524 sb.append( c1 ); 525 } 526 } 527 else 528 { 529 sb.append( c ); 530 } 531 } 532 return sb.toString(); 533 } 534 535 private String commentDelims = ""; 536 537 /** 538 * Set the characters that indicate a comment at the beginning of the line. For example if the string "#;!" were 539 * passed in, all of the following lines would be comments:<br> 540 * 541 * <pre> 542 * # Comment 543 * ; Another Comment 544 * ! Yet another comment 545 * </pre> 546 * 547 * By default there are no comments in CVS files. Commas and quotes may not be used to indicate comment lines. 548 * 549 * @param commentDelims list of characters a comment line may start with. 550 * @since ostermillerutils 1.00.00 551 */ 552 public void setCommentStart( String commentDelims ) 553 { 554 this.commentDelims = commentDelims; 555 } 556 557 private int addLine = 1; 558 559 private int lines = 0; 560 561 /** 562 * Get the line number that the last token came from. 563 * <p> 564 * New line breaks that occur in the middle of a token are not counted in the line number count. 565 * <p> 566 * If no tokens have been returned, the line number is undefined. 567 * 568 * @return line number of the last token. 569 * @since ostermillerutils 1.00.00 570 */ 571 public int getLineNumber() 572 { 573 return lines; 574 } 575 576 /** 577 * Creates a new scanner There is also a java.io.InputStream version of this constructor. 578 * 579 * @param in the java.io.Reader to read input from. 580 */ 581 public CSVLexer( java.io.Reader in ) 582 { 583 this.zzReader = in; 584 } 585 586 /** 587 * Creates a new scanner. There is also java.io.Reader version of this constructor. 588 * 589 * @param in the java.io.Inputstream to read input from. 590 */ 591 public CSVLexer( java.io.InputStream in ) 592 { 593 this( new java.io.InputStreamReader( in ) ); 594 } 595 596 /** 597 * Unpacks the compressed character translation table. 598 * 599 * @param packed the packed character translation table 600 * @return the unpacked character translation table 601 */ 602 private static char[] zzUnpackCMap( String packed ) 603 { 604 char[] map = new char[0x10000]; 605 int i = 0; /* index in packed string */ 606 int j = 0; /* index in unpacked array */ 607 while ( i < 30 ) 608 { 609 int count = packed.charAt( i++ ); 610 char value = packed.charAt( i++ ); 611 do 612 map[j++] = value; 613 while ( --count > 0 ); 614 } 615 return map; 616 } 617 618 /** 619 * Refills the input buffer. 620 * 621 * @return <code>false</code>, iff there was new input. 622 * @exception java.io.IOException if any I/O-Error occurs 623 */ 624 private boolean zzRefill() 625 throws java.io.IOException 626 { 627 628 /* first: make room (if you can) */ 629 if ( zzStartRead > 0 ) 630 { 631 System.arraycopy( zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead ); 632 633 /* translate stored positions */ 634 zzEndRead -= zzStartRead; 635 zzCurrentPos -= zzStartRead; 636 zzMarkedPos -= zzStartRead; 637 zzPushbackPos -= zzStartRead; 638 zzStartRead = 0; 639 } 640 641 /* is the buffer big enough? */ 642 if ( zzCurrentPos >= zzBuffer.length ) 643 { 644 /* if not: blow it up */ 645 char newBuffer[] = new char[zzCurrentPos * 2]; 646 System.arraycopy( zzBuffer, 0, newBuffer, 0, zzBuffer.length ); 647 zzBuffer = newBuffer; 648 } 649 650 /* finally: fill the buffer with new input */ 651 int numRead = zzReader.read( zzBuffer, zzEndRead, zzBuffer.length - zzEndRead ); 652 653 if ( numRead < 0 ) 654 { 655 return true; 656 } 657 else 658 { 659 zzEndRead += numRead; 660 return false; 661 } 662 } 663 664 /** 665 * Closes the input stream. 666 */ 667 public final void yyclose() 668 throws java.io.IOException 669 { 670 zzAtEOF = true; /* indicate end of file */ 671 zzEndRead = zzStartRead; /* invalidate buffer */ 672 673 if ( zzReader != null ) 674 zzReader.close(); 675 } 676 677 /** 678 * Resets the scanner to read from a new input stream. Does not close the old reader. All internal variables are 679 * reset, the old input stream <b>cannot</b> be reused (internal buffer is discarded and lost). Lexical state is set 680 * to <tt>ZZ_INITIAL</tt>. 681 * 682 * @param reader the new input stream 683 */ 684 public final void yyreset( java.io.Reader reader ) 685 { 686 zzReader = reader; 687 zzAtBOL = true; 688 zzAtEOF = false; 689 zzEndRead = zzStartRead = 0; 690 zzCurrentPos = zzMarkedPos = zzPushbackPos = 0; 691 yyline = yychar = yycolumn = 0; 692 zzLexicalState = YYINITIAL; 693 } 694 695 /** 696 * Returns the current lexical state. 697 */ 698 public final int yystate() 699 { 700 return zzLexicalState; 701 } 702 703 public final int yychar() 704 { 705 return yychar; 706 } 707 708 /** 709 * Enters a new lexical state 710 * 711 * @param newState the new lexical state 712 */ 713 public final void yybegin( int newState ) 714 { 715 zzLexicalState = newState; 716 } 717 718 /** 719 * Returns the text matched by the current regular expression. 720 */ 721 public final String yytext() 722 { 723 return new String( zzBuffer, zzStartRead, zzMarkedPos - zzStartRead ); 724 } 725 726 /** 727 * Returns the character at position <tt>pos</tt> from the matched text. It is equivalent to yytext().charAt(pos), 728 * but faster 729 * 730 * @param pos the position of the character to fetch. A value from 0 to yylength()-1. 731 * @return the character at position pos 732 */ 733 public final char yycharat( int pos ) 734 { 735 return zzBuffer[zzStartRead + pos]; 736 } 737 738 /** 739 * Returns the length of the matched text region. 740 */ 741 public final int yylength() 742 { 743 return zzMarkedPos - zzStartRead; 744 } 745 746 /** 747 * Reports an error that occured while scanning. In a wellformed scanner (no or only correct usage of 748 * yypushback(int) and a match-all fallback rule) this method will only be called with things that 749 * "Can't Possibly Happen". If this method is called, something is seriously wrong (e.g. a JFlex bug producing a 750 * faulty scanner etc.). Usual syntax/scanner level error handling should be done in error fallback rules. 751 * 752 * @param errorCode the code of the errormessage to display 753 */ 754 private void zzScanError( int errorCode ) 755 { 756 String message; 757 try 758 { 759 message = ZZ_ERROR_MSG[errorCode]; 760 } 761 catch ( ArrayIndexOutOfBoundsException e ) 762 { 763 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; 764 } 765 766 throw new Error( message ); 767 } 768 769 /** 770 * Pushes the specified amount of characters back into the input stream. They will be read again by then next call 771 * of the scanning method 772 * 773 * @param number the number of characters to be read again. This number must not be greater than yylength()! 774 */ 775 public void yypushback( int number ) 776 { 777 if ( number > yylength() ) 778 zzScanError( ZZ_PUSHBACK_2BIG ); 779 780 zzMarkedPos -= number; 781 } 782 783 /** 784 * Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O-Error 785 * occurs. 786 * 787 * @return the next token 788 * @exception java.io.IOException if any I/O-Error occurs 789 */ 790 public String getNextToken() 791 throws java.io.IOException 792 { 793 int zzInput; 794 int zzAction; 795 796 // cached fields: 797 int zzCurrentPosL; 798 int zzMarkedPosL; 799 int zzEndReadL = zzEndRead; 800 char[] zzBufferL = zzBuffer; 801 char[] zzCMapL = zzcmap_instance; 802 803 int[] zzTransL = ZZ_TRANS; 804 int[] zzRowMapL = ZZ_ROWMAP; 805 int[] zzAttrL = ZZ_ATTRIBUTE; 806 807 while ( true ) 808 { 809 zzMarkedPosL = zzMarkedPos; 810 811 yychar += zzMarkedPosL - zzStartRead; 812 813 boolean zzR = false; 814 for ( zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL++ ) 815 { 816 switch ( zzBufferL[zzCurrentPosL] ) 817 { 818 case '\u000B': 819 case '\u000C': 820 case '\u0085': 821 case '\u2028': 822 case '\u2029': 823 yyline++; 824 yycolumn = 0; 825 zzR = false; 826 break; 827 case '\r': 828 yyline++; 829 yycolumn = 0; 830 zzR = true; 831 break; 832 case '\n': 833 if ( zzR ) 834 zzR = false; 835 else 836 { 837 yyline++; 838 yycolumn = 0; 839 } 840 break; 841 default: 842 zzR = false; 843 yycolumn++; 844 } 845 } 846 847 if ( zzR ) 848 { 849 // peek one character ahead if it is \n (if we have counted one line too much) 850 boolean zzPeek; 851 if ( zzMarkedPosL < zzEndReadL ) 852 zzPeek = zzBufferL[zzMarkedPosL] == '\n'; 853 else if ( zzAtEOF ) 854 zzPeek = false; 855 else 856 { 857 boolean eof = zzRefill(); 858 zzEndReadL = zzEndRead; 859 zzMarkedPosL = zzMarkedPos; 860 zzBufferL = zzBuffer; 861 if ( eof ) 862 zzPeek = false; 863 else 864 zzPeek = zzBufferL[zzMarkedPosL] == '\n'; 865 } 866 if ( zzPeek ) 867 yyline--; 868 } 869 zzAction = -1; 870 871 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; 872 873 zzState = zzLexicalState; 874 875 zzForAction: 876 { 877 while ( true ) 878 { 879 880 if ( zzCurrentPosL < zzEndReadL ) 881 zzInput = zzBufferL[zzCurrentPosL++]; 882 else if ( zzAtEOF ) 883 { 884 zzInput = YYEOF; 885 break zzForAction; 886 } 887 else 888 { 889 // store back cached positions 890 zzCurrentPos = zzCurrentPosL; 891 zzMarkedPos = zzMarkedPosL; 892 boolean eof = zzRefill(); 893 // get translated positions and possibly new buffer 894 zzCurrentPosL = zzCurrentPos; 895 zzMarkedPosL = zzMarkedPos; 896 zzBufferL = zzBuffer; 897 zzEndReadL = zzEndRead; 898 if ( eof ) 899 { 900 zzInput = YYEOF; 901 break zzForAction; 902 } 903 else 904 { 905 zzInput = zzBufferL[zzCurrentPosL++]; 906 } 907 } 908 int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; 909 if ( zzNext == -1 ) 910 break zzForAction; 911 zzState = zzNext; 912 913 int zzAttributes = zzAttrL[zzState]; 914 if ( ( zzAttributes & 1 ) == 1 ) 915 { 916 zzAction = zzState; 917 zzMarkedPosL = zzCurrentPosL; 918 if ( ( zzAttributes & 8 ) == 8 ) 919 break zzForAction; 920 } 921 922 } 923 } 924 925 // store back cached position 926 zzMarkedPos = zzMarkedPosL; 927 928 switch ( zzAction < 0 ? zzAction : ZZ_ACTION[zzAction] ) 929 { 930 case 2: 931 { 932 lines += addLine; 933 addLine = 0; 934 String text = yytext(); 935 if ( commentDelims.indexOf( text.charAt( 0 ) ) == -1 ) 936 { 937 yybegin( AFTER ); 938 return ( text ); 939 } 940 else 941 { 942 yybegin( COMMENT ); 943 } 944 } 945 case 14: 946 break; 947 case 8: 948 { 949 addLine++; 950 yybegin( YYINITIAL ); 951 return ( "" ); 952 } 953 case 15: 954 break; 955 case 9: 956 { 957 yybegin( BEFORE ); 958 return ( "" ); 959 } 960 case 16: 961 break; 962 case 4: 963 { 964 addLine++; 965 yybegin( YYINITIAL ); 966 } 967 case 17: 968 break; 969 case 5: 970 { 971 lines += addLine; 972 addLine = 0; 973 yybegin( BEFORE ); 974 return ( "" ); 975 } 976 case 18: 977 break; 978 case 12: 979 { 980 lines += addLine; 981 addLine = 0; 982 yybegin( AFTER ); 983 return ( unescape( yytext() ) ); 984 } 985 case 19: 986 break; 987 case 7: 988 { 989 yybegin( AFTER ); 990 return ( yytext() ); 991 } 992 case 20: 993 break; 994 case 6: 995 { 996 lines += addLine; 997 addLine = 0; 998 yybegin( YYINITIAL ); 999 return ( yytext() ); 1000 } 1001 case 21: 1002 break; 1003 case 11: 1004 { 1005 yybegin( BEFORE ); 1006 } 1007 case 22: 1008 break; 1009 case 13: 1010 { 1011 yybegin( AFTER ); 1012 return ( unescape( yytext() ) ); 1013 } 1014 case 23: 1015 break; 1016 case 10: 1017 { 1018 yybegin( YYINITIAL ); 1019 return ( yytext() ); 1020 } 1021 case 24: 1022 break; 1023 case 1: 1024 { 1025 } 1026 case 25: 1027 break; 1028 case 3: 1029 { 1030 lines += addLine; 1031 addLine = 0; 1032 yybegin( BEFORE ); 1033 } 1034 case 26: 1035 break; 1036 default: 1037 if ( zzInput == YYEOF && zzStartRead == zzCurrentPos ) 1038 { 1039 zzAtEOF = true; 1040 switch ( zzLexicalState ) 1041 { 1042 case BEFORE: 1043 { 1044 yybegin( YYINITIAL ); 1045 addLine++; 1046 return ( "" ); 1047 } 1048 case 28: 1049 break; 1050 default: 1051 return null; 1052 } 1053 } 1054 else 1055 { 1056 zzScanError( ZZ_NO_MATCH ); 1057 } 1058 } 1059 } 1060 } 1061 1062 }