self::S_OPENCURLYBRACES, '}' => self::S_CLOSECURLYBRACES, '(' => self::S_OPENPARENTHESIS, ')' => self::S_CLOSEPARENTHESIS, '<' => self::S_LOWERTHAN, '>' => self::S_GREATERTHAN, '[' => self::S_OPENBRACKET, ']' => self::S_CLOSEBRACKET, ':' => self::S_COLON, ';' => self::S_SEMICOLON, '@' => self::S_AT, '\\' => self::S_BACKSLASH, '/' => self::S_SLASH, ',' => self::S_COMMA, '.' => self::S_DOT, "'" => self::S_SQUOTE, "`" => self::S_BACKTICK, '"' => self::S_DQUOTE, '-' => self::S_HYPHEN, '::' => self::S_DOUBLECOLON, ' ' => self::S_SP, "\t" => self::S_HTAB, "\r" => self::S_CR, "\n" => self::S_LF, "\r\n" => self::CRLF, 'IPv6' => self::S_IPV6TAG, '' => self::S_EMPTY, '\0' => self::C_NUL, '*' => self::ASTERISK, '!' => self::EXCLAMATION, '&' => self::AMPERSAND, '^' => self::CARET, '$' => self::DOLLAR, '%' => self::PERCENTAGE, '~' => self::S_TILDE, '|' => self::S_PIPE, '_' => self::S_UNDERSCORE, '=' => self::S_EQUAL, '+' => self::S_PLUS, '¿' => self::INVERT_QUESTIONMARK, '?' => self::QUESTIONMARK, '#' => self::NUMBER_SIGN, '¡' => self::INVERT_EXCLAMATION, ); /** * @var bool */ protected $hasInvalidTokens = false; /** * @var array * * @psalm-var array{value:string, type:null|int, position:int}|array */ protected $previous = []; /** * The last matched/seen token. * * @var array * * @psalm-suppress NonInvariantDocblockPropertyType * @psalm-var array{value:string, type:null|int, position:int} * @psalm-suppress NonInvariantDocblockPropertyType */ public $token; /** * The next token in the input. * * @var array|null */ public $lookahead; /** * @psalm-var array{value:'', type:null, position:0} */ private static $nullToken = [ 'value' => '', 'type' => null, 'position' => 0, ]; /** * @var string */ private $accumulator = ''; /** * @var bool */ private $hasToRecord = false; public function __construct() { $this->previous = $this->token = self::$nullToken; $this->lookahead = null; } /** * @return void */ public function reset() { $this->hasInvalidTokens = false; parent::reset(); $this->previous = $this->token = self::$nullToken; } /** * @return bool */ public function hasInvalidTokens() { return $this->hasInvalidTokens; } /** * @param int $type * @throws \UnexpectedValueException * @return boolean * * @psalm-suppress InvalidScalarArgument */ public function find($type) { $search = clone $this; $search->skipUntil($type); if (!$search->lookahead) { throw new \UnexpectedValueException($type . ' not found'); } return true; } /** * getPrevious * * @return array */ public function getPrevious() { return $this->previous; } /** * moveNext * * @return boolean */ public function moveNext() { if ($this->hasToRecord && $this->previous === self::$nullToken) { $this->accumulator .= $this->token['value']; } $this->previous = $this->token; $hasNext = parent::moveNext(); $this->token = $this->token ?: self::$nullToken; if ($this->hasToRecord) { $this->accumulator .= $this->token['value']; } return $hasNext; } /** * Lexical catchable patterns. * * @return string[] */ protected function getCatchablePatterns() { return array( '[a-zA-Z]+[46]?', //ASCII and domain literal '[^\x00-\x7F]', //UTF-8 '[0-9]+', '\r\n', '::', '\s+?', '.', ); } /** * Lexical non-catchable patterns. * * @return string[] */ protected function getNonCatchablePatterns() { return [ '[\xA0-\xff]+', ]; } /** * Retrieve token type. Also processes the token value if necessary. * * @param string $value * @throws \InvalidArgumentException * @return integer */ protected function getType(&$value) { $encoded = $value; if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') { $encoded = utf8_encode($value); } if ($this->isValid($encoded)) { return $this->charValue[$encoded]; } if ($this->isNullType($encoded)) { return self::C_NUL; } if ($this->isInvalidChar($encoded)) { $this->hasInvalidTokens = true; return self::INVALID; } return self::GENERIC; } protected function isInvalidChar(string $value) : bool { if(preg_match("/[^\p{S}\p{C}\p{Cc}]+/iu", $value) ) { return false; } return true; } protected function isValid(string $value) : bool { if (isset($this->charValue[$value])) { return true; } return false; } /** * @param string $value * @return bool */ protected function isNullType($value) { if ($value === "\0") { return true; } return false; } protected function isUTF8Invalid(string $value) : bool { if (preg_match('/\p{Cc}+/u', $value)) { return true; } return false; } /** * @return string */ protected function getModifiers() { return 'iu'; } public function getAccumulatedValues() : string { return $this->accumulator; } public function startRecording() : void { $this->hasToRecord = true; } public function stopRecording() : void { $this->hasToRecord = false; } public function clearRecorded() : void { $this->accumulator = ''; } }