703 lines
18 KiB
ActionScript
703 lines
18 KiB
ActionScript
/*
|
|
Copyright (c) 2008, Adobe Systems Incorporated
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Adobe Systems Incorporated nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
package com.adobe.serialization.json {
|
|
|
|
public class JSONTokenizer {
|
|
|
|
/**
|
|
* Flag indicating if the tokenizer should only recognize
|
|
* standard JSON tokens. Setting to <code>false</code> allows
|
|
* tokens such as NaN and allows numbers to be formatted as
|
|
* hex, etc.
|
|
*/
|
|
private var strict:Boolean;
|
|
|
|
/** The object that will get parsed from the JSON string */
|
|
private var obj:Object;
|
|
|
|
/** The JSON string to be parsed */
|
|
private var jsonString:String;
|
|
|
|
/** The current parsing location in the JSON string */
|
|
private var loc:int;
|
|
|
|
/** The current character in the JSON string during parsing */
|
|
private var ch:String;
|
|
|
|
/**
|
|
* The regular expression used to make sure the string does not
|
|
* contain invalid control characters.
|
|
*/
|
|
private var controlCharsRegExp:RegExp = /[\x00-\x1F]/;
|
|
|
|
/**
|
|
* Constructs a new JSONDecoder to parse a JSON string
|
|
* into a native object.
|
|
*
|
|
* @param s The JSON string to be converted
|
|
* into a native object
|
|
*/
|
|
public function JSONTokenizer( s:String, strict:Boolean )
|
|
{
|
|
jsonString = s;
|
|
this.strict = strict;
|
|
loc = 0;
|
|
|
|
// prime the pump by getting the first character
|
|
nextChar();
|
|
}
|
|
|
|
/**
|
|
* Gets the next token in the input sting and advances
|
|
* the character to the next character after the token
|
|
*/
|
|
public function getNextToken():JSONToken
|
|
{
|
|
var token:JSONToken = new JSONToken();
|
|
|
|
// skip any whitespace / comments since the last
|
|
// token was read
|
|
skipIgnored();
|
|
|
|
// examine the new character and see what we have...
|
|
switch ( ch )
|
|
{
|
|
case '{':
|
|
token.type = JSONTokenType.LEFT_BRACE;
|
|
token.value = '{';
|
|
nextChar();
|
|
break
|
|
|
|
case '}':
|
|
token.type = JSONTokenType.RIGHT_BRACE;
|
|
token.value = '}';
|
|
nextChar();
|
|
break
|
|
|
|
case '[':
|
|
token.type = JSONTokenType.LEFT_BRACKET;
|
|
token.value = '[';
|
|
nextChar();
|
|
break
|
|
|
|
case ']':
|
|
token.type = JSONTokenType.RIGHT_BRACKET;
|
|
token.value = ']';
|
|
nextChar();
|
|
break
|
|
|
|
case ',':
|
|
token.type = JSONTokenType.COMMA;
|
|
token.value = ',';
|
|
nextChar();
|
|
break
|
|
|
|
case ':':
|
|
token.type = JSONTokenType.COLON;
|
|
token.value = ':';
|
|
nextChar();
|
|
break;
|
|
|
|
case 't': // attempt to read true
|
|
var possibleTrue:String = "t" + nextChar() + nextChar() + nextChar();
|
|
|
|
if ( possibleTrue == "true" )
|
|
{
|
|
token.type = JSONTokenType.TRUE;
|
|
token.value = true;
|
|
nextChar();
|
|
}
|
|
else
|
|
{
|
|
parseError( "Expecting 'true' but found " + possibleTrue );
|
|
}
|
|
|
|
break;
|
|
|
|
case 'f': // attempt to read false
|
|
var possibleFalse:String = "f" + nextChar() + nextChar() + nextChar() + nextChar();
|
|
|
|
if ( possibleFalse == "false" )
|
|
{
|
|
token.type = JSONTokenType.FALSE;
|
|
token.value = false;
|
|
nextChar();
|
|
}
|
|
else
|
|
{
|
|
parseError( "Expecting 'false' but found " + possibleFalse );
|
|
}
|
|
|
|
break;
|
|
|
|
case 'n': // attempt to read null
|
|
var possibleNull:String = "n" + nextChar() + nextChar() + nextChar();
|
|
|
|
if ( possibleNull == "null" )
|
|
{
|
|
token.type = JSONTokenType.NULL;
|
|
token.value = null;
|
|
nextChar();
|
|
}
|
|
else
|
|
{
|
|
parseError( "Expecting 'null' but found " + possibleNull );
|
|
}
|
|
|
|
break;
|
|
|
|
case 'N': // attempt to read NaN
|
|
var possibleNaN:String = "N" + nextChar() + nextChar();
|
|
|
|
if ( possibleNaN == "NaN" )
|
|
{
|
|
token.type = JSONTokenType.NAN;
|
|
token.value = NaN;
|
|
nextChar();
|
|
}
|
|
else
|
|
{
|
|
parseError( "Expecting 'NaN' but found " + possibleNaN );
|
|
}
|
|
|
|
break;
|
|
|
|
case '"': // the start of a string
|
|
token = readString();
|
|
break;
|
|
|
|
default:
|
|
// see if we can read a number
|
|
if ( isDigit( ch ) || ch == '-' )
|
|
{
|
|
token = readNumber();
|
|
}
|
|
else if ( ch == '' )
|
|
{
|
|
// check for reading past the end of the string
|
|
return null;
|
|
}
|
|
else
|
|
{
|
|
// not sure what was in the input string - it's not
|
|
// anything we expected
|
|
parseError( "Unexpected " + ch + " encountered" );
|
|
}
|
|
}
|
|
|
|
return token;
|
|
}
|
|
|
|
/**
|
|
* Attempts to read a string from the input string. Places
|
|
* the character location at the first character after the
|
|
* string. It is assumed that ch is " before this method is called.
|
|
*
|
|
* @return the JSONToken with the string value if a string could
|
|
* be read. Throws an error otherwise.
|
|
*/
|
|
private function readString():JSONToken
|
|
{
|
|
// Rather than examine the string character-by-character, it's
|
|
// faster to use indexOf to try to and find the closing quote character
|
|
// and then replace escape sequences after the fact.
|
|
|
|
// Start at the current input stream position
|
|
var quoteIndex:int = loc;
|
|
do
|
|
{
|
|
// Find the next quote in the input stream
|
|
quoteIndex = jsonString.indexOf( "\"", quoteIndex );
|
|
|
|
if ( quoteIndex >= 0 )
|
|
{
|
|
// We found the next double quote character in the string, but we need
|
|
// to make sure it is not part of an escape sequence.
|
|
|
|
// Keep looping backwards while the previous character is a backslash
|
|
var backspaceCount:int = 0;
|
|
var backspaceIndex:int = quoteIndex - 1;
|
|
while ( jsonString.charAt( backspaceIndex ) == "\\" )
|
|
{
|
|
backspaceCount++;
|
|
backspaceIndex--;
|
|
}
|
|
|
|
// If we have an even number of backslashes, that means this is the ending quote
|
|
if ( backspaceCount % 2 == 0 )
|
|
{
|
|
break;
|
|
}
|
|
|
|
// At this point, the quote was determined to be part of an escape sequence
|
|
// so we need to move past the quote index to look for the next one
|
|
quoteIndex++;
|
|
}
|
|
else // There are no more quotes in the string and we haven't found the end yet
|
|
{
|
|
parseError( "Unterminated string literal" );
|
|
}
|
|
} while ( true );
|
|
|
|
// Unescape the string
|
|
// the token for the string we'll try to read
|
|
var token:JSONToken = new JSONToken();
|
|
token.type = JSONTokenType.STRING;
|
|
// Attach resulting string to the token to return it
|
|
token.value = unescapeString( jsonString.substr( loc, quoteIndex - loc ) );
|
|
|
|
// Move past the closing quote in the input string. This updates the next
|
|
// character in the input stream to be the character one after the closing quote
|
|
loc = quoteIndex + 1;
|
|
nextChar();
|
|
|
|
return token;
|
|
}
|
|
|
|
/**
|
|
* Convert all JavaScript escape characters into normal characters
|
|
*
|
|
* @param input The input string to convert
|
|
* @return Original string with escape characters replaced by real characters
|
|
*/
|
|
public function unescapeString( input:String ):String
|
|
{
|
|
// Issue #104 - If the string contains any unescaped control characters, this
|
|
// is an error in strict mode
|
|
if ( strict && controlCharsRegExp.test( input ) )
|
|
{
|
|
parseError( "String contains unescaped control character (0x00-0x1F)" );
|
|
}
|
|
|
|
var result:String = "";
|
|
var backslashIndex:int = 0;
|
|
var nextSubstringStartPosition:int = 0;
|
|
var len:int = input.length;
|
|
do
|
|
{
|
|
// Find the next backslash in the input
|
|
backslashIndex = input.indexOf( '\\', nextSubstringStartPosition );
|
|
|
|
if ( backslashIndex >= 0 )
|
|
{
|
|
result += input.substr( nextSubstringStartPosition, backslashIndex - nextSubstringStartPosition );
|
|
|
|
// Move past the backslash and next character (all escape sequences are
|
|
// two characters, except for \u, which will advance this further)
|
|
nextSubstringStartPosition = backslashIndex + 2;
|
|
|
|
// Check the next character so we know what to escape
|
|
var afterBackslashIndex:int = backslashIndex + 1;
|
|
var escapedChar:String = input.charAt( afterBackslashIndex );
|
|
switch ( escapedChar )
|
|
{
|
|
// Try to list the most common expected cases first to improve performance
|
|
|
|
case '"': result += '"'; break; // quotation mark
|
|
case '\\': result += '\\'; break; // reverse solidus
|
|
case 'n': result += '\n'; break; // newline
|
|
case 'r': result += '\r'; break; // carriage return
|
|
case 't': result += '\t'; break; // horizontal tab
|
|
|
|
// Convert a unicode escape sequence to it's character value
|
|
case 'u':
|
|
|
|
// Save the characters as a string we'll convert to an int
|
|
var hexValue:String = "";
|
|
|
|
// Make sure there are enough characters in the string leftover
|
|
if ( nextSubstringStartPosition + 4 > len )
|
|
{
|
|
parseError( "Unexpected end of input. Expecting 4 hex digits after \\u." );
|
|
}
|
|
|
|
// Try to find 4 hex characters
|
|
for ( var i:int = nextSubstringStartPosition; i < nextSubstringStartPosition + 4; i++ )
|
|
{
|
|
// get the next character and determine
|
|
// if it's a valid hex digit or not
|
|
var possibleHexChar:String = input.charAt( i );
|
|
if ( !isHexDigit( possibleHexChar ) )
|
|
{
|
|
parseError( "Excepted a hex digit, but found: " + possibleHexChar );
|
|
}
|
|
|
|
// Valid hex digit, add it to the value
|
|
hexValue += possibleHexChar;
|
|
}
|
|
|
|
// Convert hexValue to an integer, and use that
|
|
// integer value to create a character to add
|
|
// to our string.
|
|
result += String.fromCharCode( parseInt( hexValue, 16 ) );
|
|
// Move past the 4 hex digits that we just read
|
|
nextSubstringStartPosition += 4;
|
|
break;
|
|
|
|
case 'f': result += '\f'; break; // form feed
|
|
case '/': result += '/'; break; // solidus
|
|
case 'b': result += '\b'; break; // bell
|
|
default: result += '\\' + escapedChar; // Couldn't unescape the sequence, so just pass it through
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// No more backslashes to replace, append the rest of the string
|
|
result += input.substr( nextSubstringStartPosition );
|
|
break;
|
|
}
|
|
|
|
} while ( nextSubstringStartPosition < len );
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Attempts to read a number from the input string. Places
|
|
* the character location at the first character after the
|
|
* number.
|
|
*
|
|
* @return The JSONToken with the number value if a number could
|
|
* be read. Throws an error otherwise.
|
|
*/
|
|
private function readNumber():JSONToken
|
|
{
|
|
// the string to accumulate the number characters
|
|
// into that we'll convert to a number at the end
|
|
var input:String = "";
|
|
|
|
// check for a negative number
|
|
if ( ch == '-' )
|
|
{
|
|
input += '-';
|
|
nextChar();
|
|
}
|
|
|
|
// the number must start with a digit
|
|
if ( !isDigit( ch ) )
|
|
{
|
|
parseError( "Expecting a digit" );
|
|
}
|
|
|
|
// 0 can only be the first digit if it
|
|
// is followed by a decimal point
|
|
if ( ch == '0' )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
|
|
// make sure no other digits come after 0
|
|
if ( isDigit( ch ) )
|
|
{
|
|
parseError( "A digit cannot immediately follow 0" );
|
|
}
|
|
// unless we have 0x which starts a hex number, but this
|
|
// doesn't match JSON spec so check for not strict mode.
|
|
else if ( !strict && ch == 'x' )
|
|
{
|
|
// include the x in the input
|
|
input += ch;
|
|
nextChar();
|
|
|
|
// need at least one hex digit after 0x to
|
|
// be valid
|
|
if ( isHexDigit( ch ) )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
else
|
|
{
|
|
parseError( "Number in hex format require at least one hex digit after \"0x\"" );
|
|
}
|
|
|
|
// consume all of the hex values
|
|
while ( isHexDigit( ch ) )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// read numbers while we can
|
|
while ( isDigit( ch ) )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
}
|
|
|
|
// check for a decimal value
|
|
if ( ch == '.' )
|
|
{
|
|
input += '.';
|
|
nextChar();
|
|
|
|
// after the decimal there has to be a digit
|
|
if ( !isDigit( ch ) )
|
|
{
|
|
parseError( "Expecting a digit" );
|
|
}
|
|
|
|
// read more numbers to get the decimal value
|
|
while ( isDigit( ch ) )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
}
|
|
|
|
// check for scientific notation
|
|
if ( ch == 'e' || ch == 'E' )
|
|
{
|
|
input += "e"
|
|
nextChar();
|
|
// check for sign
|
|
if ( ch == '+' || ch == '-' )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
|
|
// require at least one number for the exponent
|
|
// in this case
|
|
if ( !isDigit( ch ) )
|
|
{
|
|
parseError( "Scientific notation number needs exponent value" );
|
|
}
|
|
|
|
// read in the exponent
|
|
while ( isDigit( ch ) )
|
|
{
|
|
input += ch;
|
|
nextChar();
|
|
}
|
|
}
|
|
|
|
// convert the string to a number value
|
|
var num:Number = Number( input );
|
|
|
|
if ( isFinite( num ) && !isNaN( num ) )
|
|
{
|
|
// the token for the number that we've read
|
|
var token:JSONToken = new JSONToken();
|
|
token.type = JSONTokenType.NUMBER;
|
|
token.value = num;
|
|
return token;
|
|
}
|
|
else
|
|
{
|
|
parseError( "Number " + num + " is not valid!" );
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Reads the next character in the input
|
|
* string and advances the character location.
|
|
*
|
|
* @return The next character in the input string, or
|
|
* null if we've read past the end.
|
|
*/
|
|
private function nextChar():String
|
|
{
|
|
return ch = jsonString.charAt( loc++ );
|
|
}
|
|
|
|
/**
|
|
* Advances the character location past any
|
|
* sort of white space and comments
|
|
*/
|
|
private function skipIgnored():void
|
|
{
|
|
var originalLoc:int;
|
|
|
|
// keep trying to skip whitespace and comments as long
|
|
// as we keep advancing past the original location
|
|
do
|
|
{
|
|
originalLoc = loc;
|
|
skipWhite();
|
|
skipComments();
|
|
}
|
|
while ( originalLoc != loc );
|
|
}
|
|
|
|
/**
|
|
* Skips comments in the input string, either
|
|
* single-line or multi-line. Advances the character
|
|
* to the first position after the end of the comment.
|
|
*/
|
|
private function skipComments():void
|
|
{
|
|
if ( ch == '/' )
|
|
{
|
|
// Advance past the first / to find out what type of comment
|
|
nextChar();
|
|
switch ( ch )
|
|
{
|
|
case '/': // single-line comment, read through end of line
|
|
|
|
// Loop over the characters until we find
|
|
// a newline or until there's no more characters left
|
|
do
|
|
{
|
|
nextChar();
|
|
}
|
|
while ( ch != '\n' && ch != '' )
|
|
|
|
// move past the \n
|
|
nextChar();
|
|
|
|
break;
|
|
|
|
case '*': // multi-line comment, read until closing */
|
|
|
|
// move past the opening *
|
|
nextChar();
|
|
|
|
// try to find a trailing */
|
|
while ( true )
|
|
{
|
|
if ( ch == '*' )
|
|
{
|
|
// check to see if we have a closing /
|
|
nextChar();
|
|
if ( ch == '/')
|
|
{
|
|
// move past the end of the closing */
|
|
nextChar();
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// move along, looking if the next character is a *
|
|
nextChar();
|
|
}
|
|
|
|
// when we're here we've read past the end of
|
|
// the string without finding a closing */, so error
|
|
if ( ch == '' )
|
|
{
|
|
parseError( "Multi-line comment not closed" );
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
// Can't match a comment after a /, so it's a parsing error
|
|
default:
|
|
parseError( "Unexpected " + ch + " encountered (expecting '/' or '*' )" );
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Skip any whitespace in the input string and advances
|
|
* the character to the first character after any possible
|
|
* whitespace.
|
|
*/
|
|
private function skipWhite():void
|
|
{
|
|
// As long as there are spaces in the input
|
|
// stream, advance the current location pointer
|
|
// past them
|
|
while ( isWhiteSpace( ch ) )
|
|
{
|
|
nextChar();
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Determines if a character is whitespace or not.
|
|
*
|
|
* @return True if the character passed in is a whitespace
|
|
* character
|
|
*/
|
|
private function isWhiteSpace( ch:String ):Boolean
|
|
{
|
|
// Check for the whitespace defined in the spec
|
|
if ( ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' )
|
|
{
|
|
return true;
|
|
}
|
|
// If we're not in strict mode, we also accept non-breaking space
|
|
else if ( !strict && ch.charCodeAt( 0 ) == 160 )
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Determines if a character is a digit [0-9].
|
|
*
|
|
* @return True if the character passed in is a digit
|
|
*/
|
|
private function isDigit( ch:String ):Boolean
|
|
{
|
|
return ( ch >= '0' && ch <= '9' );
|
|
}
|
|
|
|
/**
|
|
* Determines if a character is a hex digit [0-9A-Fa-f].
|
|
*
|
|
* @return True if the character passed in is a hex digit
|
|
*/
|
|
private function isHexDigit( ch:String ):Boolean
|
|
{
|
|
return ( isDigit( ch ) || ( ch >= 'A' && ch <= 'F' ) || ( ch >= 'a' && ch <= 'f' ) );
|
|
}
|
|
|
|
/**
|
|
* Raises a parsing error with a specified message, tacking
|
|
* on the error location and the original string.
|
|
*
|
|
* @param message The message indicating why the error occurred
|
|
*/
|
|
public function parseError( message:String ):void
|
|
{
|
|
throw new JSONParseError( message, loc, jsonString );
|
|
}
|
|
}
|
|
|
|
}
|