240
by Jacques Distler
Rough In New Sanitizer
1
require 'strscan'
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
4
240
by Jacques Distler
Rough In New Sanitizer
5
class Conditions < Hash #:nodoc:
8
hash = { :content => hash } unless Hash === hash
9
hash = keys_to_symbols(hash)
12
when :tag, :content then
13
# keys are valid, and require no further processing
15
hash[k] = keys_to_strings(v)
16
when :parent, :child, :ancestor, :descendant, :sibling, :before,
18
hash[k] = Conditions.new(v)
20
hash[k] = v = keys_to_symbols(v)
23
when :count, :greater_than, :less_than
24
# keys are valid, and require no further processing
26
v[k] = Conditions.new(v2)
28
raise "illegal key #{k.inspect} => #{v2.inspect}"
32
raise "illegal key #{k.inspect} => #{v.inspect}"
40
def keys_to_strings(hash)
41
hash.keys.inject({}) do |h,k|
47
def keys_to_symbols(hash)
48
hash.keys.inject({}) do |h,k|
49
raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
56
# The base class of all nodes, textual and otherwise, in an HTML document.
58
# The array of children of this node. Not all nodes have children.
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
60
240
by Jacques Distler
Rough In New Sanitizer
61
# The parent node of this node. All nodes have a parent, except for the
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
64
240
by Jacques Distler
Rough In New Sanitizer
65
# The line number of the input where this node was begun
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
67
240
by Jacques Distler
Rough In New Sanitizer
68
# The byte position in the input where this node was begun
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
70
240
by Jacques Distler
Rough In New Sanitizer
71
# Create a new node as a child of the given parent.
72
def initialize(parent, line=0, pos=0)
75
@line, @position = line, pos
78
# Return a textual representation of the node.
450
by Jacques Distler
Update lib/node.rb
80
s = []
240
by Jacques Distler
Rough In New Sanitizer
81
@children.each { |child| s << child.to_s }
450
by Jacques Distler
Update lib/node.rb
82
s.join
240
by Jacques Distler
Rough In New Sanitizer
83
end
85
# Return false (subclasses must override this to provide specific matching
86
# behavior.) +conditions+ may be of any type.
91
# Search the children of this node for the first node for which #find
92
# returns non +nil+. Returns the result of the #find call that succeeded.
94
conditions = validate_conditions(conditions)
95
@children.each do |child|
96
node = child.find(conditions)
102
# Search for all nodes that match the given conditions, and return them
104
def find_all(conditions)
105
conditions = validate_conditions(conditions)
108
matches << self if match(conditions)
109
@children.each do |child|
110
matches.concat child.find_all(conditions)
115
# Returns +false+. Subclasses may override this if they define a kind of
121
def validate_conditions(conditions)
122
Conditions === conditions ? conditions : Conditions.new(conditions)
126
return false unless self.class == node.class && children.size == node.children.size
130
children.size.times do |i|
131
equivalent &&= children[i] == node.children[i]
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
136
240
by Jacques Distler
Rough In New Sanitizer
137
class <<self
138
def parse(parent, line, pos, content, strict=true)
140
Text.new(parent, line, pos, content)
142
scanner = StringScanner.new(content)
144
unless scanner.skip(/</)
148
return Text.new(parent, line, pos, content)
152
if scanner.skip(/!\[CDATA\[/)
450
by Jacques Distler
Update lib/node.rb
153
unless scanner.skip_until(/\]\]>/)
155
raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
157
scanner.skip_until(/\Z/)
240
by Jacques Distler
Rough In New Sanitizer
161
return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
163
240
by Jacques Distler
Rough In New Sanitizer
164
closing = ( scanner.scan(/\//) ? :close : nil )
165
return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:-]+/)
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
166
240
by Jacques Distler
Rough In New Sanitizer
167
unless closing
170
while attr = scanner.scan(/[-\w:]+/)
172
if scanner.scan(/\s*=\s*/)
173
if delim = scanner.scan(/['"]/)
450
by Jacques Distler
Update lib/node.rb
174
v = []
240
by Jacques Distler
Rough In New Sanitizer
175
while text = scanner.scan(/[^#{delim}\\]+|./)
450
by Jacques Distler
Update lib/node.rb
178
v << text
240
by Jacques Distler
Rough In New Sanitizer
180
when delim
450
by Jacques Distler
Update lib/node.rb
182
else v << text
240
by Jacques Distler
Rough In New Sanitizer
183
end
450
by Jacques Distler
Update lib/node.rb
185
value = v.join
240
by Jacques Distler
Rough In New Sanitizer
186
else
187
value = scanner.scan(/[^\s>\/]+/)
190
attributes[attr] = value
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
193
194
closing = ( scanner.scan(/\/>/) ? :self : nil )
240
by Jacques Distler
Rough In New Sanitizer
195
end
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
196
240
by Jacques Distler
Rough In New Sanitizer
197
unless scanner.scan(/\s*>/)
199
raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
201
# throw away all text until we find what we're looking for
202
scanner.skip_until(/>/) or scanner.terminate
206
Tag.new(parent, line, pos, name, attributes, closing)
212
# A node that represents text, rather than markup.
213
class Text < Node #:nodoc:
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
214
240
by Jacques Distler
Rough In New Sanitizer
215
attr_reader :content
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
216
240
by Jacques Distler
Rough In New Sanitizer
217
# Creates a new text node as a child of the given parent, with the given
219
def initialize(parent, line, pos, content)
220
super(parent, line, pos)
224
# Returns the content of this node.
229
# Returns +self+ if this node meets the given conditions. Text nodes support
230
# conditions of the following kinds:
232
# * if +conditions+ is a string, it must be a substring of the node's
234
# * if +conditions+ is a regular expression, it must match the node's
236
# * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
237
# is either a string or a regexp, and which is interpreted as described
240
match(conditions) && self
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
242
240
by Jacques Distler
Rough In New Sanitizer
243
# Returns non-+nil+ if this node meets the given conditions, or +nil+
244
# otherwise. See the discussion of #find for the valid conditions.
245
def match(conditions)
248
@content == conditions
250
@content =~ conditions
252
conditions = validate_conditions(conditions)
254
# Text nodes only have :content, :parent, :ancestor
255
unless (conditions.keys - [:content, :parent, :ancestor]).empty?
259
match(conditions[:content])
266
return false unless super
267
content == node.content
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
270
240
by Jacques Distler
Rough In New Sanitizer
271
# A CDATA node is simply a text node with a specialized way of displaying
273
class CDATA < Text #:nodoc:
450
by Jacques Distler
Update lib/node.rb
275
"<![CDATA[#{super}]]>"
240
by Jacques Distler
Rough In New Sanitizer
276
end
279
# A Tag is any node that represents markup. It may be an opening tag, a
280
# closing tag, or a self-closing tag. It has a name, and may have a hash of
282
class Tag < Node #:nodoc:
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
283
240
by Jacques Distler
Rough In New Sanitizer
284
# Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
286
240
by Jacques Distler
Rough In New Sanitizer
287
# Either +nil+, or a hash of attributes for this node.
288
attr_reader :attributes
290
# The name of this tag.
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
292
240
by Jacques Distler
Rough In New Sanitizer
293
# Create a new node as a child of the given parent, using the given content
294
# to describe the node. It will be parsed and the node name, attributes and
295
# closing status extracted.
296
def initialize(parent, line, pos, name, attributes, closing)
297
super(parent, line, pos)
299
@attributes = attributes
303
# A convenience for obtaining an attribute of the node. Returns +nil+ if
304
# the node has no attributes.
306
@attributes ? @attributes[attr] : nil
309
# Returns non-+nil+ if this tag can contain child nodes.
310
def childless?(xml = false)
452
by Jacques Distler
Also fix S5 slides.js
311
return false if xml && @closing.nil?
240
by Jacques Distler
Rough In New Sanitizer
312
# !@closing.nil? ||
313
@name =~ /^(img|br|hr|link|meta|area|base|basefont|
314
col|frame|input|isindex|param)$/ox
317
# Returns a textual representation of the node
319
if @closing == :close
450
by Jacques Distler
Update lib/node.rb
320
"</#{@name}>" unless self.childless?
240
by Jacques Distler
Rough In New Sanitizer
321
else
450
by Jacques Distler
Update lib/node.rb
322
s = ["<#{@name}"]
323
@attributes.sort.each do |k,v|
325
s << "='#{v}'" if String === v
240
by Jacques Distler
Rough In New Sanitizer
326
end
327
s << "/" if (@children.empty? && @closing == :self) or self.childless?
329
@children.each { |child| s << child.to_s }
451
by Jacques Distler
Use AssetTagHelpers in S5 Template
330
s << "</#{@name}>" unless @closing == :self or self.childless? or @children.empty?
450
by Jacques Distler
Update lib/node.rb
331
s.join
240
by Jacques Distler
Rough In New Sanitizer
332
end
335
# If either the node or any of its children meet the given conditions, the
336
# matching node is returned. Otherwise, +nil+ is returned. (See the
337
# description of the valid conditions in the +match+ method.)
339
match(conditions) && self || super
342
# Returns +true+, indicating that this node represents an HTML tag.
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
346
240
by Jacques Distler
Rough In New Sanitizer
347
# Returns +true+ if the node meets any of the given conditions. The
348
# +conditions+ parameter must be a hash of any of the following keys
349
# (all are optional):
351
# * <tt>:tag</tt>: the node name must match the corresponding value
352
# * <tt>:attributes</tt>: a hash. The node's values must match the
353
# corresponding values in the hash.
354
# * <tt>:parent</tt>: a hash. The node's parent must match the
355
# corresponding hash.
356
# * <tt>:child</tt>: a hash. At least one of the node's immediate children
357
# must meet the criteria described by the hash.
358
# * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
359
# meet the criteria described by the hash.
360
# * <tt>:descendant</tt>: a hash. At least one of the node's descendants
361
# must meet the criteria described by the hash.
362
# * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
363
# meet the criteria described by the hash.
364
# * <tt>:after</tt>: a hash. The node must be after any sibling meeting
365
# the criteria described by the hash, and at least one sibling must match.
366
# * <tt>:before</tt>: a hash. The node must be before any sibling meeting
367
# the criteria described by the hash, and at least one sibling must match.
368
# * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
370
# ** <tt>:count</tt>: either a number or a range which must equal (or
371
# include) the number of children that match.
372
# ** <tt>:less_than</tt>: the number of matching children must be less than
374
# ** <tt>:greater_than</tt>: the number of matching children must be
375
# greater than this number.
376
# ** <tt>:only</tt>: another hash consisting of the keys to use
377
# to match on the children, and only matching children will be
380
# Conditions are matched using the following algorithm:
382
# * if the condition is a string, it must be a substring of the value.
383
# * if the condition is a regexp, it must match the value.
384
# * if the condition is a number, the value must match number.to_s.
385
# * if the condition is +true+, the value must not be +nil+.
386
# * if the condition is +false+ or +nil+, the value must be +nil+.
390
# # test if the node is a "span" tag
391
# node.match :tag => "span"
393
# # test if the node's parent is a "div"
394
# node.match :parent => { :tag => "div" }
396
# # test if any of the node's ancestors are "table" tags
397
# node.match :ancestor => { :tag => "table" }
399
# # test if any of the node's immediate children are "em" tags
400
# node.match :child => { :tag => "em" }
402
# # test if any of the node's descendants are "strong" tags
403
# node.match :descendant => { :tag => "strong" }
405
# # test if the node has between 2 and 4 span tags as immediate children
406
# node.match :children => { :count => 2..4, :only => { :tag => "span" } }
408
# # get funky: test to see if the node is a "div", has a "ul" ancestor
409
# # and an "li" parent (with "class" = "enum"), and whether or not it has
410
# # a "span" descendant that contains # text matching /hello world/:
411
# node.match :tag => "div",
412
# :ancestor => { :tag => "ul" },
413
# :parent => { :tag => "li",
414
# :attributes => { :class => "enum" } },
415
# :descendant => { :tag => "span",
416
# :child => /hello world/ }
417
def match(conditions)
418
conditions = validate_conditions(conditions)
419
# check content of child nodes
420
if conditions[:content]
422
return false unless match_condition("", conditions[:content])
424
return false unless children.find { |child| child.match(conditions[:content]) }
429
return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
432
(conditions[:attributes] || {}).each do |key, value|
433
return false unless match_condition(self[key], value)
437
return false unless parent.match(conditions[:parent]) if conditions[:parent]
440
return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
443
if conditions[:ancestor]
444
return false unless catch :found do
446
throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
451
if conditions[:descendant]
452
return false unless children.find do |child|
454
child.match(conditions[:descendant]) ||
455
# test the child's descendants
456
child.match(:descendant => conditions[:descendant])
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
459
240
by Jacques Distler
Rough In New Sanitizer
460
# count children
461
if opts = conditions[:children]
462
matches = children.select do |c|
463
(c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
465
240
by Jacques Distler
Rough In New Sanitizer
466
matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
467
opts.each do |key, value|
472
return false if matches.length != value
474
return false unless value.include?(matches.length)
477
return false unless matches.length < value
479
return false unless matches.length > value
480
else raise "unknown count condition #{key}"
486
if conditions[:sibling] || conditions[:before] || conditions[:after]
487
siblings = parent ? parent.children : []
488
self_index = siblings.index(self)
490
if conditions[:sibling]
491
return false unless siblings.detect do |s|
492
s != self && s.match(conditions[:sibling])
496
if conditions[:before]
497
return false unless siblings[self_index+1..-1].detect do |s|
498
s != self && s.match(conditions[:before])
502
if conditions[:after]
503
return false unless siblings[0,self_index].detect do |s|
504
s != self && s.match(conditions[:after])
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
508
240
by Jacques Distler
Rough In New Sanitizer
509
true
513
return false unless super
514
return false unless closing == node.closing && self.name == node.name
515
attributes == node.attributes
946
by Jacques Distler
Fix a Sanitizer test and adjust some whitespace
517
240
by Jacques Distler
Rough In New Sanitizer
518
private
519
# Match the given value to the given condition.
520
def match_condition(value, condition)
523
value && value == condition
525
value && value.match(condition)
527
value == condition.to_s