Commit 7463b8ef authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Separate EsprimaLexer and EsprimaTokenizer.

The former now implements the same API as old Lexer, while the
Tokenizer is a singleton class that loads esprima.js just once
and provides the tokenizing service.  Tokenizer also converts
all Esprima tokens to JsDuck::Lexer kind of tokens.
parent b34e0a40
Loading
Loading
Loading
Loading
+23 −31
Original line number Diff line number Diff line
require 'v8'
require 'json'
require 'jsduck/esprima_tokenizer'

module JsDuck

  # New experimental lexer that uses Esprima.js through V8.
  class EsprimaLexer
    def initialize
      @v8 = V8::Context.new
      @v8.load(File.dirname(File.dirname(File.dirname(File.dirname(__FILE__))))+"/esprima/esprima.js")
    def initialize(input)
      @tokens = EsprimaTokenizer.instance.tokenize(input)
      @position = 0
    end

    # Input must be a String.
    def tokenize(input)
      @v8['js'] = input
      program = JSON.parse(@v8.eval('JSON.stringify(esprima.parse(js, {tokens: true, comment: true}))'), :max_nesting => false)
      merge_tokens(program["tokens"], program["comments"].find_all {|c| doc_comment?(c) })
    def look(*tokens)
      i = @position
      tokens.all? do |t|
        tok = @tokens[i]
        i += 1
        if !tok
          false
        elsif t.instance_of?(Symbol)
          tok[:type] == t
        else
          tok[:value] == t
        end

    private

    # True if comment is a /** doc-comment */
    def doc_comment?(comment)
      comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
      end

    # Combines tokens and comments arrays into one array
    # while keeping them in correct order.
    def merge_tokens(tokens, comments)
      result = []
      com = comments.shift
      tok = tokens.shift
      while com || tok
        if !com || tok && (tok["range"][0] < com["range"][0])
          result << tok
          tok = tokens.shift
        else
          result << com
          com = comments.shift
    end

    def next(full=false)
      tok = @tokens[@position]
      @position += 1
      full ? tok : tok[:value]
    end
      result

    def empty?
      !@tokens[@position]
    end

  end
+77 −0
Original line number Diff line number Diff line
require 'v8'
require 'json'
require 'singleton'

module JsDuck

  # Uses Esprima.js engine through V8 to tokenize JavaScript string.
  class EsprimaTokenizer
    include Singleton

    def initialize
      @v8 = V8::Context.new
      @v8.load(File.dirname(File.dirname(File.dirname(File.dirname(__FILE__))))+"/esprima/esprima.js")
    end

    # Input must be a String.
    def tokenize(input)
      @v8['js'] = @input = input
      program = JSON.parse(@v8.eval('JSON.stringify(esprima.parse(js, {tokens: true, comment: true}))'), :max_nesting => false)
      doc_comments = program["comments"].find_all {|c| doc_comment?(c) }
      return merge_tokens(program["tokens"], doc_comments).map {|tok| to_jsduck_token(tok) }
    end

    private

    # True if comment is a /** doc-comment */
    def doc_comment?(comment)
      comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
    end

    # Combines tokens and comments arrays into one array
    # while keeping them in correct order.
    def merge_tokens(tokens, comments)
      result = []
      com = comments.shift
      tok = tokens.shift
      while com || tok
        if !com || tok && (tok["range"][0] < com["range"][0])
          result << tok
          tok = tokens.shift
        else
          result << com
          com = comments.shift
        end
      end
      result
    end

    # Converts Esprima token to JSDuck token
    def to_jsduck_token(tok)
      case tok["type"]
      when "Numeric"
        {:type => :number, :value => tok["value"]}
      when "String"
        {:type => :string, :value => tok["value"]}
      when "Identifier"
        {:type => :ident, :value => tok["value"]}
      when "RegularExpression"
        {:type => :regex, :value => tok["value"]}
      when "Punctuator"
        {:type => :operator, :value => tok["value"]}
      when "Keyword"
        kw = tok["value"].to_sym
        {:type => kw, :value => kw}
      when "Block"
        {
          :type => :doc_comment,
          :value => "/*#{tok['value']}*/",
          :linenr => @input[0...tok["range"][0]].count("\n") + 1,
        }
      else
        throw "Unknown Esprima token type #{tok['type']}"
      end
    end

  end
end
+32 −28
Original line number Diff line number Diff line
@@ -2,46 +2,50 @@ require "jsduck/esprima_lexer"

describe JsDuck::EsprimaLexer do

  def tokenize(js)
    @lexer = JsDuck::EsprimaLexer.new
    @lexer.tokenize(js).map {|t| t["type"] }
  def lexer(input)
    JsDuck::EsprimaLexer.new(input)
  end

  describe "tokenize" do
    it "works with comment in the middle" do
      tokenize("foo = /** */ 3;").should == [
        "Identifier", "Punctuator", "Block", "Numeric", "Punctuator"
      ]
  describe "empty?" do
    it "is true when no tokens" do
      lexer("").empty?.should == true
    end

    it "works with comment at the beginning" do
      tokenize("/** */ var Foo;").should == [
        "Block", "Keyword", "Identifier", "Punctuator"
      ]
    it "is false when there are tokens" do
      lexer(";").empty?.should == false
    end
  end

  describe "next()" do
    it "gives value of next token" do
      lexer("var x;").next.should == :var
    end

    it "works with comment at the end" do
      tokenize("'use strict'; /** */").should == [
        "String", "Punctuator", "Block"
      ]
    it "gives value of the n-th token when called n times" do
      lex = lexer("var x;")
      lex.next
      lex.next
      lex.next.should == ";"
    end
  end

  describe "next(true)" do
    it "gives full next token" do
      lexer(";").next(true).should == {:type => :operator, :value => ";"}
    end
  end

    it "works when only comment" do
      tokenize(" /** I am comment*/ ").should == [
        "Block"
      ]
  describe "look()" do
    it "is true when all params match" do
      lexer("var x = 10;").look(:var, "x", "=").should == true
    end

    it "works when just one token before comment" do
      tokenize(" ; /** I am comment*/ ").should == [
        "Punctuator", "Block"
      ]
    it "is false when at least one param doesn't match" do
      lexer("var x = 10;").look(:var, "y", "=").should == false
    end

    it "works when just one token after comment" do
      tokenize(" /** I am comment*/ z").should == [
        "Block", "Identifier"
      ]
    it "is false when not enough tokens" do
      lexer(";").look(";", :var).should == false
    end
  end

+55 −0
Original line number Diff line number Diff line
require "jsduck/esprima_tokenizer"

describe JsDuck::EsprimaTokenizer do

  def tokenize(js)
    JsDuck::EsprimaTokenizer.instance.tokenize(js).map {|t| t[:type] }
  end

  describe "tokenize" do
    it "works with comment in the middle" do
      tokenize("foo = /** */ 3;").should == [
        :ident, :operator, :doc_comment, :number, :operator
      ]
    end

    it "works with comment at the beginning" do
      tokenize("/** */ var Foo;").should == [
        :doc_comment, :var, :ident, :operator
      ]
    end

    it "works with comment at the end" do
      tokenize("'use strict'; /** */").should == [
        :string, :operator, :doc_comment
      ]
    end

    it "works when only comment" do
      tokenize(" /** I am comment*/ ").should == [
        :doc_comment
      ]
    end

    it "works when just one token before comment" do
      tokenize(" ; /** I am comment*/ ").should == [
        :operator, :doc_comment
      ]
    end

    it "works when just one token after comment" do
      tokenize(" /** I am comment*/ z").should == [
        :doc_comment, :ident
      ]
    end

    it "augments :doc_comment token with line number" do
      tokens = JsDuck::EsprimaTokenizer.instance.tokenize("\n \n /**Com1*/ \n /**Com2\n*/ /**Com3*/")
      tokens[0][:linenr].should == 3
      tokens[1][:linenr].should == 4
      tokens[2][:linenr].should == 5
    end
  end

end