Mini parser for detecting character encoding from meta elements
Return a name,value pair for the next attribute in the stream, if one is found, or nil
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 514 def get_attribute @data.skip(SPACE_CHARACTERS + ['/']) if @data.current_byte == '<' @data.position -= 1 return nil elsif @data.current_byte == '>' return nil end attr_name = [] attr_value = [] space_found = false #Step 5 attribute name while true if @data.current_byte == '=' and attr_name break elsif SPACE_CHARACTERS.include?(@data.current_byte) space_found = true break elsif ['/', '<', '>'].include?(@data.current_byte) return [attr_name.join(''), ''] elsif ASCII_UPPERCASE.include?(@data.current_byte) attr_name.push(@data.current_byte.downcase) else attr_name.push(@data.current_byte) end #Step 6 @data.position += 1 end #Step 7 if space_found @data.skip #Step 8 unless @data.current_byte == '=' @data.position -= 1 return [attr_name.join(''), ''] end end #XXX need to advance position in both spaces and value case #Step 9 @data.position += 1 #Step 10 @data.skip #Step 11 if ["'", '"'].include?(@data.current_byte) #11.1 quote_char = @data.current_byte while true @data.position+=1 #11.3 if @data.current_byte == quote_char @data.position += 1 return [attr_name.join(''), attr_value.join('')] #11.4 elsif ASCII_UPPERCASE.include?(@data.current_byte) attr_value.push(@data.current_byte.downcase) #11.5 else attr_value.push(@data.current_byte) end end elsif ['>', '<'].include?(@data.current_byte) return [attr_name.join(''), ''] elsif ASCII_UPPERCASE.include?(@data.current_byte) attr_value.push(@data.current_byte.downcase) else attr_value.push(@data.current_byte) end while true @data.position += 1 if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte) return [attr_name.join(''), attr_value.join('')] elsif ASCII_UPPERCASE.include?(@data.current_byte) attr_value.push(@data.current_byte.downcase) else attr_value.push(@data.current_byte) end end end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 426 def get_encoding @data.each do |byte| keep_parsing = true @@method_dispatch.each do |(key, method)| if @data.match_bytes(key, lower = true) keep_parsing = send(method) break end end break unless keep_parsing end @encoding = @encoding.strip unless @encoding.nil? return @encoding end
Skip over comments
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 442 def handle_comment return @data.jump_to('-->') end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 446 def handle_meta # if we have <meta not followed by a space so just keep going return true unless SPACE_CHARACTERS.include?(@data.current_byte) #We have a valid meta element we want to search for attributes while true #Try to find the next attribute after the current position attr = get_attribute return true if attr.nil? if attr[0] == 'charset' tentative_encoding = attr[1] if HTML5.is_valid_encoding(tentative_encoding) @encoding = tentative_encoding return false end elsif attr[0] == 'content' content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1])) tentative_encoding = content_parser.parse if HTML5.is_valid_encoding(tentative_encoding) @encoding = tentative_encoding return false end end end end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 508 def handle_other return @data.jump_to('>') end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 478 def handle_possible_end_tag @data.position += 1 return handle_possible_tag(true) end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 474 def handle_possible_start_tag return handle_possible_tag(false) end
# File lib/feed_tools/vendor/html5/lib/html5/inputstream.rb, line 483 def handle_possible_tag(end_tag) unless ASCII_LETTERS.include?(@data.current_byte) #If the next byte is not an ascii letter either ignore this #fragment (possible start tag case) or treat it according to #handleOther if end_tag @data.position -= 1 handle_other end return true end @data.find_next(SPACE_CHARACTERS + ['<', '>']) if @data.current_byte == '<' #return to the first step in the overall "two step" algorithm #reprocessing the < byte @data.position -= 1 else #Read all attributes {} until get_attribute.nil? end return true end
Generated with the Darkfish Rdoc Generator 2.