using System.Diagnostics; namespace dotless.Core.Parser { using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using Exceptions; using Infrastructure.Nodes; using Utils; [DebuggerDisplay("{Remaining}")] public class Tokenizer { public int Optimization { get; set; } private string _input; // LeSS input string private List _chunks; // chunkified input private int _i; // current index in `input` private int _j; // current chunk private int _current; // index of current chunk, in `input` private int _lastCommentStart = -1; // the start of the last collection of comments private int _lastCommentEnd = -1; // the end of the last collection of comments private int _inputLength; private readonly string _commentRegEx = @"(//[^\n]*|(/\*(.|[\r\n])*?\*/))"; private readonly string _quotedRegEx = @"(""((?:[^""\\\r\n]|\\.)*)""|'((?:[^'\\\r\n]|\\.)*)')"; private string _fileName; //Increasing throughput through tracing of Regex private IDictionary regexCache = new Dictionary(); public Tokenizer(int optimization) { Optimization = optimization; } public void SetupInput(string input, string fileName) { _fileName = fileName; _i = _j = _current = 0; _chunks = new List(); _input = input.Replace("\r\n", "\n"); _inputLength = _input.Length; // Split the input into chunks, // Either delimited by /\n\n/ or // delmited by '\n}' (see rationale above), // depending on the level of optimization. if(Optimization == 0) _chunks.Add(new Chunk(_input)); else { var skip = new Regex(@"\G(@\{[a-zA-Z0-9_-]+\}|[^\""'{}/\\\(\)]+)"); var comment = GetRegex(this._commentRegEx, RegexOptions.None); var quotedstring = GetRegex(this._quotedRegEx, RegexOptions.None); var level = 0; var lastBlock = 0; var inParam = false; int i = 0; while(i < _inputLength) { var match = skip.Match(_input, i); if(match.Success) { Chunk.Append(match.Value, _chunks); i += match.Length; continue; } var c = _input[i]; if(i < _inputLength - 1 && c == '/') { var cc = _input[i + 1]; if ((!inParam && cc == '/') || cc == '*') { match = comment.Match(_input, i); if(match.Success) { i += match.Length; _chunks.Add(new Chunk(match.Value, ChunkType.Comment)); continue; } else { throw new ParsingException("Missing closing comment", GetNodeLocation(i)); } } } if(c == '"' || c == '\'') { match = quotedstring.Match(_input, i); if(match.Success) { i += match.Length; _chunks.Add(new Chunk(match.Value, ChunkType.QuotedString)); continue; } else { throw new ParsingException(string.Format("Missing closing quote ({0})", c), GetNodeLocation(i)); } } // we are not in a quoted string or comment - process '{' level if(!inParam && c == '{') { level++; lastBlock = i; } else if (!inParam && c == '}') { level--; if(level < 0) throw new ParsingException("Unexpected '}'", GetNodeLocation(i)); Chunk.Append(c, _chunks, true); i++; continue; } if (c == '(') { inParam = true; } else if (c == ')') { inParam = false; } Chunk.Append(c, _chunks); i++; } if(level > 0) throw new ParsingException("Missing closing '}'", GetNodeLocation(lastBlock)); _input = Chunk.CommitAll(_chunks); _inputLength = _input.Length; } Advance(0); // skip any whitespace characters at the start. } public string GetComment() { // if we've hit the end we might still be looking at a valid chunk, so return early if (_i == _inputLength) { return null; } string val; int startI = _i; int endI = 0; if (Optimization == 0) { if (this.CurrentChar != '/') return null; var comment = this.Match(this._commentRegEx); if (comment == null) { return null; } val = comment.Value; endI = startI + comment.Value.Length; } else { if (_chunks[_j].Type == ChunkType.Comment) { val = _chunks[_j].Value; endI = _i + _chunks[_j].Value.Length; Advance(_chunks[_j].Value.Length); } else { return null; } } if (_lastCommentEnd != startI) { _lastCommentStart = startI; } _lastCommentEnd = endI; return val; } public string GetQuotedString() { // if we've hit the end we might still be looking at a valid chunk, so return early if (_i == _inputLength) { return null; } if (Optimization == 0) { if (this.CurrentChar != '"' && this.CurrentChar != '\'') return null; var quotedstring = this.Match(this._quotedRegEx); return quotedstring.Value; } else { if (_chunks[_j].Type == ChunkType.QuotedString) { string val = _chunks[_j].Value; Advance(_chunks[_j].Value.Length); return val; } } return null; } public string MatchString(char tok) { var c = Match(tok); return c == null ? null : c.Value; } public string MatchString(string tok) { var match = Match(tok); return match == null ? null : match.Value; } // // Parse from a token, regexp or string, and move forward if match // public CharMatchResult Match(char tok) { if (_i == _inputLength || _chunks[_j].Type != ChunkType.Text) { return null; } if (_input[_i] == tok) { var index = _i; Advance(1); return new CharMatchResult(tok) { Location = GetNodeLocation(index) }; } return null; } public RegexMatchResult Match(string tok) { return Match(tok, false); } public RegexMatchResult Match(string tok, bool caseInsensitive) { if (_i == _inputLength || _chunks[_j].Type != ChunkType.Text) { return null; } var options = RegexOptions.None; if (caseInsensitive) options |= RegexOptions.IgnoreCase; var regex = GetRegex(tok, options); var match = regex.Match(_chunks[_j].Value, _i - _current); if (!match.Success) return null; var index = _i; Advance(match.Length); return new RegexMatchResult(match) {Location = GetNodeLocation(index)}; } // Match a string, but include the possibility of matching quoted and comments public RegexMatchResult MatchAny(string tok) { if (_i == _inputLength) { return null; } var regex = GetRegex(tok, RegexOptions.None); var match = regex.Match(_input, _i); if (!match.Success) return null; Advance(match.Length); if (_i > _current && _i < _current + _chunks[_j].Value.Length) { //If we absorbed the start of an inline comment then turn it into text so the rest can be absorbed if (_chunks[_j].Type == ChunkType.Comment && _chunks[_j].Value.StartsWith("//")) { _chunks[_j].Type = ChunkType.Text; } } return new RegexMatchResult(match); } public void Advance(int length) { if (_i == _inputLength) //only for empty cases as there may not be any chunks return; // The match is confirmed, add the match length to `i`, // and consume any extra white-space characters (' ' || '\n') // which come after that. The reason for this is that LeSS's // grammar is mostly white-space insensitive. _i += length; var endIndex = _current + _chunks[_j].Value.Length; while (true) { if(_i == _inputLength) break; if (_i >= endIndex) { if (_j < _chunks.Count - 1) { _current = endIndex; endIndex += _chunks[++_j].Value.Length; continue; // allow skipping multiple chunks } else break; } if (!char.IsWhiteSpace(_input[_i])) break; _i++; } } // Same as Match, but don't change the state of the parser, // just return the match. public bool Peek(char tok) { if (_i == _inputLength) return false; return _input[_i] == tok; } public bool Peek(string tok) { var regex = GetRegex(tok, RegexOptions.None); var match = regex.Match(_input, _i); return match.Success; } public bool PeekAfterComments(char tok) { var memo = this.Location; while(GetComment() != null); var peekSuccess = Peek(tok); this.Location = memo; return peekSuccess; } private Regex GetRegex(string pattern, RegexOptions options) { if (!regexCache.ContainsKey(pattern)) regexCache.Add(pattern, new Regex(@"\G" + pattern, options)); return regexCache[pattern]; } public char GetPreviousCharIgnoringComments() { if (_i == 0) { return '\0'; } if (_i != _lastCommentEnd) { return PreviousChar; } int i = _lastCommentStart - 1; if (i < 0) { return '\0'; } return _input[i]; } public char PreviousChar { get { return _i == 0 ? '\0' : _input[_i - 1]; } } public char CurrentChar { get { return _i == _inputLength ? '\0' : _input[_i]; } } public char NextChar { get { return _i + 1 == _inputLength ? '\0' : _input[_i + 1]; } } public bool HasCompletedParsing() { return _i == _inputLength; } public Location Location { get { return new Location { Index = _i, CurrentChunk = _j, CurrentChunkIndex = _current }; } set { _i = value.Index; _j = value.CurrentChunk; _current = value.CurrentChunkIndex; } } public NodeLocation GetNodeLocation(int index) { return new NodeLocation(index, this._input, this._fileName); } public NodeLocation GetNodeLocation() { return GetNodeLocation(this.Location.Index); } private enum ChunkType { Text, Comment, QuotedString } private class Chunk { private StringBuilder _builder; public Chunk(string val) { Value = val; Type = ChunkType.Text; } public Chunk(string val, ChunkType type) { Value = val; Type = type; } public Chunk() { _builder = new StringBuilder(); Type = ChunkType.Text; } public ChunkType Type { get; set; } public string Value { get; set; } private bool _final; public void Append(string str) { _builder.Append(str); } public void Append(char c) { _builder.Append(c); } private static Chunk ReadyForText(List chunks) { Chunk last = chunks.LastOrDefault(); if (last == null || last.Type != ChunkType.Text || last._final == true) { last = new Chunk(); chunks.Add(last); } return last; } public static void Append(char c, List chunks, bool final) { Chunk chunk = ReadyForText(chunks); chunk.Append(c); chunk._final = final; } public static void Append(char c, List chunks) { Chunk chunk = ReadyForText(chunks); chunk.Append(c); } public static void Append(string s, List chunks) { Chunk chunk = ReadyForText(chunks); chunk.Append(s); } public static string CommitAll(List chunks) { StringBuilder all = new StringBuilder(); foreach(Chunk chunk in chunks) { if (chunk._builder != null) { string val = chunk._builder.ToString(); chunk._builder = null; chunk.Value = val; } all.Append(chunk.Value); } return all.ToString(); } } private string Remaining { get { return _input.Substring(_i); } } } public class Location { public int Index { get; set; } public int CurrentChunk { get; set; } public int CurrentChunkIndex { get; set; } } }