Optimize use of V8 engine. (16cd5271) · Commits · extjs / jsduck

lib/jsduck/esprima_tokenizer.rb

+43 −53

Original line number	Diff line number	Diff line
		@@ -2,6 +2,10 @@ require 'v8'
		require 'json'
		require 'singleton'

		class V8::Object
		attr_reader :native
		end

		module JsDuck

		# Uses Esprima.js engine through V8 to tokenize JavaScript string.
		@@ -10,71 +14,57 @@ module JsDuck

		def initialize
		@v8 = V8::Context.new
		fname = File.dirname(File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))))+"/esprima/esprima.js";
		@v8.load(fname)
		esprima = File.dirname(File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))))+"/esprima/esprima.js";
		@v8.load(esprima)
		wrapper = File.dirname((File.expand_path(__FILE__)))+"/esprima_wrapper.js";
		@v8.load(wrapper)
		end

		# Input must be a String.
		def tokenize(input)
		@v8['js'] = @input = input
		program = JSON.parse(@v8.eval(<<-EOS), :max_nesting => false)
		var out = esprima.parse(js, {tokens: true, comment: true});
		JSON.stringify({tokens: out.tokens, comments: out.comments});
		EOS
		doc_comments = program["comments"].find_all {\|c\| doc_comment?(c) }
		return merge_tokens(program["tokens"], doc_comments).map {\|tok\| to_jsduck_token(tok) }
		end

		private
		out = @v8.eval("EsprimaWrapper.parse(js)")

		len = out["type"].length
		out_type = out["type"].native
		out_value = out["value"].native
		out_linenr = out["linenr"].native
		out_value_array = out["valueArray"].native

		lock = V8::C::Locker.new

		# True if comment is a /** doc-comment */
		def doc_comment?(comment)
		comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
		type_array = [
		:number,
		:string,
		:ident,
		:regex,
		:operator,
		:keyword,
		:doc_comment,
		]

		value_array = []
		for i in (0..(out_value_array.Length()-1))
		value_array << out_value_array.Get(i).AsciiValue();
		end

		# Combines tokens and comments arrays into one array
		# while keeping them in correct order.
		def merge_tokens(tokens, comments)
		result = []
		com = comments.shift
		tok = tokens.shift
		while com \|\| tok
		if !com \|\| tok && (tok["range"][0] < com["range"][0])
		result << tok
		tok = tokens.shift
		tokens = []
		for i in (0..(len-1))
		t = type_array[out_type.Get(i)]
		if t == :doc_comment
		tokens << { :type => t, :value => out_value.Get(i).AsciiValue(), :linenr => out_linenr.Get(i) }
		elsif t == :keyword
		kw = value_array[out_value.Get(i)].to_sym
		tokens << { :type => kw, :value => kw }
		else
		result << com
		com = comments.shift
		end
		tokens << { :type => t, :value => value_array[out_value.Get(i)] }
		end
		result
		end

		# Converts Esprima token to JSDuck token
		def to_jsduck_token(tok)
		case tok["type"]
		when "Numeric"
		{:type => :number, :value => tok["value"]}
		when "String"
		{:type => :string, :value => tok["value"].gsub(/\A['"]\|['"]\Z/m, "")}
		when "Identifier", "Boolean", "Null"
		{:type => :ident, :value => tok["value"]}
		when "RegularExpression"
		{:type => :regex, :value => tok["value"]}
		when "Punctuator"
		{:type => :operator, :value => tok["value"]}
		when "Keyword"
		kw = tok["value"].to_sym
		{:type => kw, :value => kw}
		when "Block"
		{
		:type => :doc_comment,
		:value => "/#{tok['value']}/",
		:linenr => @input[0...tok["range"][0]].count("\n") + 1,
		}
		else
		throw "Unknown Esprima token type #{tok['type']}"
		end
		lock.delete

		tokens
		end

		end

lib/jsduck/esprima_wrapper.js

0 → 100644

+100 −0

Original line number	Diff line number	Diff line
		var EsprimaWrapper = (function() {

		var typeMap = {
		"Numeric": 0,
		"String": 1,
		"Identifier": 2,
		"Boolean": 2,
		"Null": 2,
		"RegularExpression": 3,
		"Punctuator": 4,
		"Keyword": 5,
		"Block": 6
		};

		function exportTokens(tokens, full_input) {
		var valueMap = {};
		var valueCounter = 0;
		var type = [], value = [], linenr = [];
		var lastComIndex = 0;
		var lastComLineNr = 1;

		for (var i=0, len=tokens.length; i<len; i++) {
		var t = tokens[i];

		type[i] = typeMap[t.type];

		var v = t.value;
		if (t.type === "Block") {
		value[i] = "/" + v + "/";
		linenr[i] = (full_input.slice(lastComIndex, t.range[0]).match(/\n/g) \|\| []).length + lastComLineNr;
		lastComIndex = t.range[0];
		lastComLineNr = linenr[i];
		}
		else {
		if (t.type === "String") {
		v = v.replace(/^['"]\|['"]$/g, "");
		}
		if (!Object.prototype.hasOwnProperty.call(valueMap, v)) {
		valueMap[v] = valueCounter;
		valueCounter++;
		}
		value[i] = valueMap[v];
		linenr[i] = 0;
		}
		}

		var valueArray = [];
		for (var v in valueMap) {
		if (Object.prototype.hasOwnProperty.call(valueMap, v)) {
		valueArray[valueMap[v]] = v;
		}
		}

		return {type: type, value: value, linenr: linenr, valueArray: valueArray};
		};

		function filterDocComments(comments) {
		var docs = [];
		for (var i=0, len=comments.length; i<len; i++) {
		var c = comments[i];
		if (c.type === "Block" && /^\*/.test(c.value)) {
		docs.push(c);
		}
		}
		return docs;
		}

		// Combines tokens and comments arrays into one array
		// while keeping them in correct order.
		function mergeTokens(tokens, comments) {
		var result = [];
		var c = 0, com = comments[c];
		var t = 0, tok = tokens[t];
		while (com \|\| tok) {
		if (!com \|\| tok && (tok.range[0] < com.range[0])) {
		result.push(tok);
		tok = tokens[++t];
		}
		else {
		result.push(com);
		com = comments[++c];
		}
		}
		return result;
		}

		function parse(js) {
		var program = esprima.parse(js, {tokens: true, comment: true});

		var tokens = program.tokens;
		var comments = filterDocComments(program.comments);

		// return exportTokens(tokens.concat(comments));
		return exportTokens(mergeTokens(tokens, comments), js);
		}

		return {parse: parse};

		})();