Commit a701a688 authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

New lexer that uses Esprima.js internally.

Unlike our old lexer, Esprima gives us comment nodes and normal tokens
as separate lists, which we have to merge together into continuous
stream.
parent 1b26df57
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
require 'v8'
require 'json'

module JsDuck

  # New experimental lexer that uses Esprima.js through V8.
  class EsprimaLexer
    def initialize
      @v8 = V8::Context.new
      @v8.load(File.dirname(File.dirname(File.dirname(File.dirname(__FILE__))))+"/esprima/esprima.js")
    end

    # Input must be a String.
    def tokenize(input)
      @v8['js'] = input
      program = JSON.parse(@v8.eval('JSON.stringify(esprima.parse(js, {tokens: true, comment: true}))'), :max_nesting => false)
      merge_tokens(program["tokens"], program["comments"].find_all {|c| doc_comment?(c) })
    end

    # True if comments is a /** doc-comment */
    def doc_comment?(comment)
      comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
    end

    def merge_tokens(tokens, comments)
      comments.each {|c| tokens.insert(index_of(c["range"], tokens), c) }
      tokens
    end

    # returns the index where the token with given range should be inserted.
    def index_of(range, tokens)
      if tokens.length == 0 || tokens.last["range"][1] < range[0]
        return tokens.length
      end

      left = 0
      right = tokens.length - 1

      while left < right
        middle = (left + right) / 2

        if right - left == 1 && tokens[left]["range"][1] < range[0] && range[1] < tokens[right]["range"][0]
          break
        elsif range[1] < tokens[middle]["range"][0]
          right = middle
        else
          left = middle + 1
        end
      end

      right
    end

  end
end
+74 −0
Original line number Diff line number Diff line
require "jsduck/esprima_lexer"

describe JsDuck::EsprimaLexer do

  before do
    @lexer = JsDuck::EsprimaLexer.new
  end

  describe "comment injection" do
    it "works with comment in the middle" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        {"range" => [12, 12]},    # ;
        # {"range" => [13, 26]},  # /** comment */
        {"range" => [27, 29]},    # var
        {"range" => [31, 33]},    # Foo
        {"range" => [34, 34]},    # ;
      ]
      @lexer.index_of([13, 26], tokens).should == 2
    end

    it "works with comment at the beginning" do
      tokens = [
        # {"range" => [0, 14]},   # /** comment */
        {"range" => [16, 18]},    # var
        {"range" => [20, 22]},    # Foo
        {"range" => [23, 23]},    # ;
      ]
      @lexer.index_of([0, 14], tokens).should == 0
    end

    it "works with comment at the end" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        {"range" => [12, 12]},    # ;
        # {"range" => [13, 26]},  # /** comment */
      ]
      @lexer.index_of([13, 26], tokens).should == 2
    end

    it "works when no tokens at all" do
      tokens = [
        # {"range" => [13, 26]},  # /** comment */
      ]
      @lexer.index_of([13, 26], tokens).should == 0
    end

    it "works when just one token before" do
      tokens = [
        {"range" => [0, 11]},     # "use strict"
        # {"range" => [13, 26]},  # /** comment */
      ]
      @lexer.index_of([13, 26], tokens).should == 1
    end

    it "works when just one token after" do
      tokens = [
        # {"range" => [13, 26]},  # /** comment */
        {"range" => [30, 22]},     # "use strict"
      ]
      @lexer.index_of([13, 26], tokens).should == 0
    end
  end

  describe "tokenize" do
    it "places doc-comments to correct spot" do
      @lexer.tokenize("foo = /** */ 3; /** */").map {|t| t["type"] }.should == [
        "Identifier", "Punctuator", "Block", "Numeric", "Punctuator", "Block"
      ]
    end
  end

end