C0 code coverage information

Generated on Fri Jul 11 15:55:29 -0700 2008 with rcov 0.7.0


Code reported as executed by Ruby looks like this...
and this: this line is also marked as covered.
Lines considered as run by rcov, but not reported by Ruby, look like this,
and this: these lines were inferred by rcov (using simple heuristics).
Finally, here's a line marked as not executed.
Name Total lines Lines of code Total coverage Code coverage
lib/openid/yadis/htmltokenizer.rb 305 175
84.6% 
75.4% 
  1 # = HTMLTokenizer
  2 #
  3 # Author::    Ben Giddings  (mailto:bg-rubyforge@infofiend.com)
  4 # Copyright:: Copyright (c) 2004 Ben Giddings
  5 # License::   Distributes under the same terms as Ruby
  6 #
  7 #
  8 # This is a partial port of the functionality behind Perl's TokeParser
  9 # Provided a page it progressively returns tokens from that page
 10 #
 11 # $Id: htmltokenizer.rb,v 1.7 2005/06/07 21:05:53 merc Exp $
 12 
 13 #
 14 # A class to tokenize HTML.
 15 #
 16 # Example:
 17 #
 18 #   page = "<HTML>
 19 #   <HEAD>
 20 #   <TITLE>This is the title</TITLE>
 21 #   </HEAD>
 22 #    <!-- Here comes the <a href=\"missing.link\">blah</a>
 23 #    comment body
 24 #     -->
 25 #    <BODY>
 26 #      <H1>This is the header</H1>
 27 #      <P>
 28 #        This is the paragraph, it contains
 29 #        <a href=\"link.html\">links</a>,
 30 #        <img src=\"blah.gif\" optional alt='images
 31 #        are
 32 #        really cool'>.  Ok, here is some more text and
 33 #        <A href=\"http://another.link.com/\" target=\"_blank\">another link</A>.
 34 #      </P>
 35 #    </body>
 36 #    </HTML>
 37 #    "
 38 #    toke = HTMLTokenizer.new(page)
 39 #
 40 #    assert("<h1>" == toke.getTag("h1", "h2", "h3").to_s.downcase)
 41 #    assert(HTMLTag.new("<a href=\"link.html\">") == toke.getTag("IMG", "A"))
 42 #    assert("links" == toke.getTrimmedText)
 43 #    assert(toke.getTag("IMG", "A").attr_hash['optional'])
 44 #    assert("_blank" == toke.getTag("IMG", "A").attr_hash['target'])
 45 #
 46 class HTMLTokenizer
 47   @@version = 1.0
 48 
 49   # Get version of HTMLTokenizer lib
 50   def self.version
 51     @@version
 52   end
 53 
 54   attr_reader :page
 55 
 56   # Create a new tokenizer, based on the content, used as a string.
 57   def initialize(content)
 58     @page = content.to_s
 59     @cur_pos = 0
 60   end
 61 
 62   # Reset the parser, setting the current position back at the stop
 63   def reset
 64     @cur_pos = 0
 65   end
 66 
 67   # Look at the next token, but don't actually grab it
 68   def peekNextToken
 69     if @cur_pos == @page.length then return nil end
 70 
 71     if ?< == @page[@cur_pos]
 72       # Next token is a tag of some kind
 73       if '!--' == @page[(@cur_pos + 1), 3]
 74         # Token is a comment
 75         tag_end = @page.index('-->', (@cur_pos + 1))
 76         if tag_end.nil?
 77           raise HTMLTokenizerError, "No end found to started comment:\n#{@page[@cur_pos,80]}"
 78         end
 79         # p @page[@cur_pos .. (tag_end+2)]
 80         HTMLComment.new(@page[@cur_pos .. (tag_end + 2)])
 81       else
 82         # Token is a html tag
 83         tag_end = @page.index('>', (@cur_pos + 1))
 84         if tag_end.nil?
 85           raise HTMLTokenizerError, "No end found to started tag:\n#{@page[@cur_pos,80]}"
 86         end
 87         # p @page[@cur_pos .. tag_end]
 88         HTMLTag.new(@page[@cur_pos .. tag_end])
 89       end
 90     else
 91       # Next token is text
 92       text_end = @page.index('<', @cur_pos)
 93       text_end = text_end.nil? ? -1 : (text_end - 1)
 94       # p @page[@cur_pos .. text_end]
 95       HTMLText.new(@page[@cur_pos .. text_end])
 96     end
 97   end
 98 
 99   # Get the next token, returns an instance of
100   # * HTMLText
101   # * HTMLToken
102   # * HTMLTag
103   def getNextToken
104     token = peekNextToken
105     if token
106       # @page = @page[token.raw.length .. -1]
107       # @page.slice!(0, token.raw.length)
108       @cur_pos += token.raw.length
109     end
110     #p token
111     #print token.raw
112     return token
113   end
114 
115   # Get a tag from the specified set of desired tags.
116   # For example:
117   # <tt>foo =  toke.getTag("h1", "h2", "h3")</tt>
118   # Will return the next header tag encountered.
119   def getTag(*sought_tags)
120     sought_tags.collect! {|elm| elm.downcase}
121 
122     while (tag = getNextToken)
123       if tag.kind_of?(HTMLTag) and
124           (0 == sought_tags.length or sought_tags.include?(tag.tag_name))
125         break
126       end
127     end
128     tag
129   end
130 
131   # Get all the text between the current position and the next tag
132   # (if specified) or a specific later tag
133   def getText(until_tag = nil)
134     if until_tag.nil?
135       if ?< == @page[@cur_pos]
136         # Next token is a tag, not text
137         ""
138       else
139         # Next token is text
140         getNextToken.text
141       end
142     else
143       ret_str = ""
144 
145       while (tag = peekNextToken)
146         if tag.kind_of?(HTMLTag) and tag.tag_name == until_tag
147           break
148         end
149 
150         if ("" != tag.text)
151           ret_str << (tag.text + " ")
152         end
153         getNextToken
154       end
155 
156       ret_str
157     end
158   end
159 
160   # Like getText, but squeeze all whitespace, getting rid of
161   # leading and trailing whitespace, and squeezing multiple
162   # spaces into a single space.
163   def getTrimmedText(until_tag = nil)
164     getText(until_tag).strip.gsub(/\s+/m, " ")
165   end
166 
167 end
168 
169 class HTMLTokenizerError < Exception
170 end
171 
172 # The parent class for all three types of HTML tokens
173 class HTMLToken
174   attr_accessor :raw
175 
176   # Initialize the token based on the raw text
177   def initialize(text)
178     @raw = text
179   end
180 
181   # By default, return exactly the string used to create the text
182   def to_s
183     raw
184   end
185 
186   # By default tokens have no text representation
187   def text
188     ""
189   end
190 
191   def trimmed_text
192     text.strip.gsub(/\s+/m, " ")
193   end
194 
195   # Compare to another based on the raw source
196   def ==(other)
197     raw == other.to_s
198   end
199 end
200 
201 # Class representing text that isn't inside a tag
202 class HTMLText < HTMLToken
203   def text
204     raw
205   end
206 end
207 
208 # Class representing an HTML comment
209 class HTMLComment < HTMLToken
210   attr_accessor :contents
211   def initialize(text)
212     super(text)
213     temp_arr = text.scan(/^<!--\s*(.*?)\s*-->$/m)
214     if temp_arr[0].nil?
215       raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
216     end
217 
218     @contents = temp_arr[0][0]
219   end
220 end
221 
222 # Class representing an HTML tag
223 class HTMLTag < HTMLToken
224   attr_reader :end_tag, :tag_name
225   def initialize(text)
226     super(text)
227     if ?< != text[0] or ?> != text[-1]
228       raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
229     end
230 
231     @attr_hash = Hash.new
232     @raw = text
233 
234     tag_name = text.scan(/[\w:-]+/)[0]
235     if tag_name.nil?
236       raise HTMLTokenizerError, "Error, tag is nil: #{tag_name}"
237     end
238 
239     if ?/ == text[1]
240       # It's an end tag
241       @end_tag = true
242       @tag_name = '/' + tag_name.downcase
243     else
244       @end_tag = false
245       @tag_name = tag_name.downcase
246     end
247 
248     @hashed = false
249   end
250 
251   # Retrieve a hash of all the tag's attributes.
252   # Lazily done, so that if you don't look at a tag's attributes
253   # things go quicker
254   def attr_hash
255     # Lazy initialize == don't build the hash until it's needed
256     if !@hashed
257       if !@end_tag
258         # Get the attributes
259         attr_arr = @raw.scan(/<[\w:-]+\s+(.*?)\/?>/m)[0]
260         if attr_arr.kind_of?(Array)
261           # Attributes found, parse them
262           attrs = attr_arr[0]
263           attr_arr = attrs.scan(/\s*([\w:-]+)(?:\s*=\s*("[^"]*"|'[^']*'|([^"'>][^\s>]*)))?/m)
264           # clean up the array by:
265           # * setting all nil elements to true
266           # * removing enclosing quotes
267           attr_arr.each {
268             |item|
269             val = if item[1].nil?
270                     item[0]
271                   elsif '"'[0] == item[1][0] or '\''[0] == item[1][0]
272                     item[1][1 .. -2]
273                   else
274                     item[1]
275                   end
276             @attr_hash[item[0].downcase] = val
277           }
278         end
279       end
280       @hashed = true
281     end
282 
283     #p self
284 
285     @attr_hash
286   end
287 
288   # Get the 'alt' text for a tag, if it exists, or an empty string otherwise
289   def text
290     if !end_tag
291       case tag_name
292       when 'img'
293         if !attr_hash['alt'].nil?
294           return attr_hash['alt']
295         end
296       when 'applet'
297         if !attr_hash['alt'].nil?
298           return attr_hash['alt']
299         end
300       end
301     end
302     return ''
303   end
304 end
305 

Generated using the rcov code coverage analysis tool for Ruby version 0.7.0.

Valid XHTML 1.0! Valid CSS!