Separate EsprimaLexer and EsprimaTokenizer. (7463b8ef) · Commits · extjs / jsduck

lib/jsduck/esprima_lexer.rb

+23 −31

Original line number	Diff line number	Diff line
		require 'v8'
		require 'json'
		require 'jsduck/esprima_tokenizer'

		module JsDuck

		# New experimental lexer that uses Esprima.js through V8.
		class EsprimaLexer
		def initialize
		@v8 = V8::Context.new
		@v8.load(File.dirname(File.dirname(File.dirname(File.dirname(__FILE__))))+"/esprima/esprima.js")
		def initialize(input)
		@tokens = EsprimaTokenizer.instance.tokenize(input)
		@position = 0
		end

		# Input must be a String.
		def tokenize(input)
		@v8['js'] = input
		program = JSON.parse(@v8.eval('JSON.stringify(esprima.parse(js, {tokens: true, comment: true}))'), :max_nesting => false)
		merge_tokens(program["tokens"], program["comments"].find_all {\|c\| doc_comment?(c) })
		def look(*tokens)
		i = @position
		tokens.all? do \|t\|
		tok = @tokens[i]
		i += 1
		if !tok
		false
		elsif t.instance_of?(Symbol)
		tok[:type] == t
		else
		tok[:value] == t
		end

		private

		# True if comment is a /** doc-comment */
		def doc_comment?(comment)
		comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
		end

		# Combines tokens and comments arrays into one array
		# while keeping them in correct order.
		def merge_tokens(tokens, comments)
		result = []
		com = comments.shift
		tok = tokens.shift
		while com \|\| tok
		if !com \|\| tok && (tok["range"][0] < com["range"][0])
		result << tok
		tok = tokens.shift
		else
		result << com
		com = comments.shift
		end

		def next(full=false)
		tok = @tokens[@position]
		@position += 1
		full ? tok : tok[:value]
		end
		result

		def empty?
		!@tokens[@position]
		end

		end

lib/jsduck/esprima_tokenizer.rb

0 → 100644

+77 −0

Original line number	Diff line number	Diff line
		require 'v8'
		require 'json'
		require 'singleton'

		module JsDuck

		# Uses Esprima.js engine through V8 to tokenize JavaScript string.
		class EsprimaTokenizer
		include Singleton

		def initialize
		@v8 = V8::Context.new
		@v8.load(File.dirname(File.dirname(File.dirname(File.dirname(__FILE__))))+"/esprima/esprima.js")
		end

		# Input must be a String.
		def tokenize(input)
		@v8['js'] = @input = input
		program = JSON.parse(@v8.eval('JSON.stringify(esprima.parse(js, {tokens: true, comment: true}))'), :max_nesting => false)
		doc_comments = program["comments"].find_all {\|c\| doc_comment?(c) }
		return merge_tokens(program["tokens"], doc_comments).map {\|tok\| to_jsduck_token(tok) }
		end

		private

		# True if comment is a /** doc-comment */
		def doc_comment?(comment)
		comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
		end

		# Combines tokens and comments arrays into one array
		# while keeping them in correct order.
		def merge_tokens(tokens, comments)
		result = []
		com = comments.shift
		tok = tokens.shift
		while com \|\| tok
		if !com \|\| tok && (tok["range"][0] < com["range"][0])
		result << tok
		tok = tokens.shift
		else
		result << com
		com = comments.shift
		end
		end
		result
		end

		# Converts Esprima token to JSDuck token
		def to_jsduck_token(tok)
		case tok["type"]
		when "Numeric"
		{:type => :number, :value => tok["value"]}
		when "String"
		{:type => :string, :value => tok["value"]}
		when "Identifier"
		{:type => :ident, :value => tok["value"]}
		when "RegularExpression"
		{:type => :regex, :value => tok["value"]}
		when "Punctuator"
		{:type => :operator, :value => tok["value"]}
		when "Keyword"
		kw = tok["value"].to_sym
		{:type => kw, :value => kw}
		when "Block"
		{
		:type => :doc_comment,
		:value => "/#{tok['value']}/",
		:linenr => @input[0...tok["range"][0]].count("\n") + 1,
		}
		else
		throw "Unknown Esprima token type #{tok['type']}"
		end
		end

		end
		end

spec/esprima_lexer_spec.rb

+32 −28

Original line number	Diff line number	Diff line
		@@ -2,46 +2,50 @@ require "jsduck/esprima_lexer"

		describe JsDuck::EsprimaLexer do

		def tokenize(js)
		@lexer = JsDuck::EsprimaLexer.new
		@lexer.tokenize(js).map {\|t\| t["type"] }
		def lexer(input)
		JsDuck::EsprimaLexer.new(input)
		end

		describe "tokenize" do
		it "works with comment in the middle" do
		tokenize("foo = /** */ 3;").should == [
		"Identifier", "Punctuator", "Block", "Numeric", "Punctuator"
		]
		describe "empty?" do
		it "is true when no tokens" do
		lexer("").empty?.should == true
		end

		it "works with comment at the beginning" do
		tokenize("/** */ var Foo;").should == [
		"Block", "Keyword", "Identifier", "Punctuator"
		]
		it "is false when there are tokens" do
		lexer(";").empty?.should == false
		end
		end

		describe "next()" do
		it "gives value of next token" do
		lexer("var x;").next.should == :var
		end

		it "works with comment at the end" do
		tokenize("'use strict'; /** */").should == [
		"String", "Punctuator", "Block"
		]
		it "gives value of the n-th token when called n times" do
		lex = lexer("var x;")
		lex.next
		lex.next
		lex.next.should == ";"
		end
		end

		describe "next(true)" do
		it "gives full next token" do
		lexer(";").next(true).should == {:type => :operator, :value => ";"}
		end
		end

		it "works when only comment" do
		tokenize(" /** I am comment*/ ").should == [
		"Block"
		]
		describe "look()" do
		it "is true when all params match" do
		lexer("var x = 10;").look(:var, "x", "=").should == true
		end

		it "works when just one token before comment" do
		tokenize(" ; /** I am comment*/ ").should == [
		"Punctuator", "Block"
		]
		it "is false when at least one param doesn't match" do
		lexer("var x = 10;").look(:var, "y", "=").should == false
		end

		it "works when just one token after comment" do
		tokenize(" /** I am comment*/ z").should == [
		"Block", "Identifier"
		]
		it "is false when not enough tokens" do
		lexer(";").look(";", :var).should == false
		end
		end

spec/esprima_tokenizer_spec.rb

0 → 100644

+55 −0

Original line number	Diff line number	Diff line
		require "jsduck/esprima_tokenizer"

		describe JsDuck::EsprimaTokenizer do

		def tokenize(js)
		JsDuck::EsprimaTokenizer.instance.tokenize(js).map {\|t\| t[:type] }
		end

		describe "tokenize" do
		it "works with comment in the middle" do
		tokenize("foo = /** */ 3;").should == [
		:ident, :operator, :doc_comment, :number, :operator
		]
		end

		it "works with comment at the beginning" do
		tokenize("/** */ var Foo;").should == [
		:doc_comment, :var, :ident, :operator
		]
		end

		it "works with comment at the end" do
		tokenize("'use strict'; /** */").should == [
		:string, :operator, :doc_comment
		]
		end

		it "works when only comment" do
		tokenize(" /** I am comment*/ ").should == [
		:doc_comment
		]
		end

		it "works when just one token before comment" do
		tokenize(" ; /** I am comment*/ ").should == [
		:operator, :doc_comment
		]
		end

		it "works when just one token after comment" do
		tokenize(" /** I am comment*/ z").should == [
		:doc_comment, :ident
		]
		end

		it "augments :doc_comment token with line number" do
		tokens = JsDuck::EsprimaTokenizer.instance.tokenize("\n \n /*Com1/ \n /*Com2\n/ /*Com3/")
		tokens[0][:linenr].should == 3
		tokens[1][:linenr].should == 4
		tokens[2][:linenr].should == 5
		end
		end

		end