Commit acc1da97 authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Correctly shorten unicode text.

Previously it may have happened, that multy-byte characters got
breaken from the middle, resulting in invalid characters. Now
we're using the unicode support in regexes to count chars correctly.

A great resource for UTF-8 support in Ruby 1.8.7 is found here:
http://blog.grayproductions.net/articles/bytes_and_characters_in_ruby_18
parent 09761cab
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -345,8 +345,10 @@ module JsDuck
    #
    def shorten(input)
      sent = first_sentence(strip_tags(input))
      if sent.length > @max_length
        sent[0..(@max_length-4)] + "..."
      # Use u-modifier to correctly count multi-byte characters
      chars = sent.scan(/./mu)
      if chars.length > @max_length
        chars[0..(@max_length-4)].join + "..."
      else
        sent + " ..."
      end
@@ -359,7 +361,9 @@ module JsDuck
    # Returns true when input should get shortened.
    def too_long?(input)
      stripped = strip_tags(input)
      first_sentence(stripped).length < stripped.length || stripped.length > @max_length
      # for sentence v/s full - compare byte length
      # for full v/s max - compare char length
      first_sentence(stripped).length < stripped.length || stripped.scan(/./mu).length > @max_length
    end

    def strip_tags(str)
+14 −0
Original line number Diff line number Diff line
@@ -469,6 +469,16 @@ describe JsDuck::DocFormatter do
      @formatter.shorten("12345678901").should == "1234567..."
    end

    it "counts multi-byte characters correctly when measuring text length" do
      # Text ending with a-umlaut character
      @formatter.shorten("123456789\303\244").should == "123456789\303\244 ..."
    end

    it "shortens text with multi-byte characters correctly" do
      # Text containing a-umlaut character
      @formatter.shorten("123456\303\2448901").should == "123456\303\244..."
    end

    it "strips HTML tags when shortening" do
      @formatter.shorten("<a href='some-long-link'>12345678901</a>").should == "1234567..."
    end
@@ -500,6 +510,10 @@ describe JsDuck::DocFormatter do
      @formatter.too_long?("<a href='some-long-link'>Foo</a>").should == false
    end

    it "counts multi-byte characters correctly" do
      # Text ending with a-umlaut character
      @formatter.too_long?("123456789\303\244").should == false
    end
  end