Commit 893652e2 authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Simplify comments and tokens merging.

Replaced the previous clumsy algorithm with a way simpler and faster one.
parent a701a688
Loading
Loading
Loading
Loading
+12 −23
Original line number Diff line number Diff line
@@ -22,33 +22,22 @@ module JsDuck
      comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
    end

    # Combines tokens and comments arrays into one array
    # while keeping them in correct order.
    def merge_tokens(tokens, comments)
      comments.each {|c| tokens.insert(index_of(c["range"], tokens), c) }
      tokens
    end

    # returns the index where the token with given range should be inserted.
    def index_of(range, tokens)
      if tokens.length == 0 || tokens.last["range"][1] < range[0]
        return tokens.length
      end

      left = 0
      right = tokens.length - 1

      while left < right
        middle = (left + right) / 2

        if right - left == 1 && tokens[left]["range"][1] < range[0] && range[1] < tokens[right]["range"][0]
          break
        elsif range[1] < tokens[middle]["range"][0]
          right = middle
      result = []
      com = comments.shift
      tok = tokens.shift
      while com || tok
        if !com || tok && (tok["range"][0] < com["range"][0])
          result << tok
          tok = tokens.shift
        else
          left = middle + 1
          result << com
          com = comments.shift
        end
      end

      right
      result
    end

  end
+18 −43
Original line number Diff line number Diff line
@@ -2,70 +2,45 @@ require "jsduck/esprima_lexer"

describe JsDuck::EsprimaLexer do

  before do
  def tokenize(js)
    @lexer = JsDuck::EsprimaLexer.new
    @lexer.tokenize(js).map {|t| t["type"] }
  end

  describe "comment injection" do
  describe "tokenize" do
    it "works with comment in the middle" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        {"range" => [12, 12]},    # ;
        # {"range" => [13, 26]},  # /** comment */
        {"range" => [27, 29]},    # var
        {"range" => [31, 33]},    # Foo
        {"range" => [34, 34]},    # ;
      tokenize("foo = /** */ 3;").should == [
        "Identifier", "Punctuator", "Block", "Numeric", "Punctuator"
      ]
      @lexer.index_of([13, 26], tokens).should == 2
    end

    it "works with comment at the beginning" do
      tokens = [
        # {"range" => [0, 14]},   # /** comment */
        {"range" => [16, 18]},    # var
        {"range" => [20, 22]},    # Foo
        {"range" => [23, 23]},    # ;
      tokenize("/** */ var Foo;").should == [
        "Block", "Keyword", "Identifier", "Punctuator"
      ]
      @lexer.index_of([0, 14], tokens).should == 0
    end

    it "works with comment at the end" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        {"range" => [12, 12]},    # ;
        # {"range" => [13, 26]},  # /** comment */
      ]
      @lexer.index_of([13, 26], tokens).should == 2
    end

    it "works when no tokens at all" do
      tokens = [
        # {"range" => [13, 26]},  # /** comment */
      tokenize("'use strict'; /** */").should == [
        "String", "Punctuator", "Block"
      ]
      @lexer.index_of([13, 26], tokens).should == 0
    end

    it "works when just one token before" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        # {"range" => [13, 26]},  # /** comment */
    it "works when only comment" do
      tokenize(" /** I am comment*/ ").should == [
        "Block"
      ]
      @lexer.index_of([13, 26], tokens).should == 1
    end

    it "works when just one token after" do
      tokens = [
        # {"range" => [13, 26]},  # /** comment */
        {"range" => [30, 22]},     # "use strict"
    it "works when just one token before comment" do
      tokenize(" ; /** I am comment*/ ").should == [
        "Punctuator", "Block"
      ]
      @lexer.index_of([13, 26], tokens).should == 0
    end
    end

  describe "tokenize" do
    it "places doc-comments to correct spot" do
      @lexer.tokenize("foo = /** */ 3; /** */").map {|t| t["type"] }.should == [
        "Identifier", "Punctuator", "Block", "Numeric", "Punctuator", "Block"
    it "works when just one token after comment" do
      tokenize(" /** I am comment*/ z").should == [
        "Block", "Identifier"
      ]
    end
  end