Skip to content

Commit 4325835

Browse files
authored
Read quoted attributes in chunks (#126)
1 parent e77365e commit 4325835

File tree

4 files changed

+46
-15
lines changed

4 files changed

+46
-15
lines changed

Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ group :development do
1010
gem "bundler"
1111
gem "rake"
1212
gem "test-unit"
13+
gem "test-unit-ruby-core"
1314
end

lib/rexml/parsers/baseparser.rb

+10-10
Original file line numberDiff line numberDiff line change
@@ -628,17 +628,17 @@ def parse_attributes(prefixes, curr_ns)
628628
message = "Missing attribute equal: <#{name}>"
629629
raise REXML::ParseException.new(message, @source)
630630
end
631-
unless match = @source.match(/(['"])(.*?)\1\s*/um, true)
632-
if match = @source.match(/(['"])/, true)
633-
message =
634-
"Missing attribute value end quote: <#{name}>: <#{match[1]}>"
635-
raise REXML::ParseException.new(message, @source)
636-
else
637-
message = "Missing attribute value start quote: <#{name}>"
638-
raise REXML::ParseException.new(message, @source)
639-
end
631+
unless match = @source.match(/(['"])/, true)
632+
message = "Missing attribute value start quote: <#{name}>"
633+
raise REXML::ParseException.new(message, @source)
634+
end
635+
quote = match[1]
636+
value = @source.read_until(quote)
637+
unless value.chomp!(quote)
638+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
639+
raise REXML::ParseException.new(message, @source)
640640
end
641-
value = match[2]
641+
@source.match(/\s*/um, true)
642642
if prefix == "xmlns"
643643
if local_part == "xml"
644644
if value != "http://www.w3.org/XML/1998/namespace"

lib/rexml/source.rb

+24-5
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,11 @@ def encoding=(enc)
6565
encoding_updated
6666
end
6767

68-
def read
68+
def read(term = nil)
69+
end
70+
71+
def read_until(term)
72+
@scanner.scan_until(Regexp.union(term)) or @scanner.rest
6973
end
7074

7175
def ensure_buffer
@@ -158,16 +162,31 @@ def initialize(arg, block_size=500, encoding=nil)
158162
end
159163
end
160164

161-
def read
165+
def read(term = nil)
162166
begin
163-
@scanner << readline
167+
@scanner << readline(term)
164168
true
165169
rescue Exception, NameError
166170
@source = nil
167171
false
168172
end
169173
end
170174

175+
def read_until(term)
176+
pattern = Regexp.union(term)
177+
data = []
178+
begin
179+
until str = @scanner.scan_until(pattern)
180+
@scanner << readline(term)
181+
end
182+
rescue EOFError
183+
@scanner.rest
184+
else
185+
read if @scanner.eos? and !@source.eof?
186+
str
187+
end
188+
end
189+
171190
def ensure_buffer
172191
read if @scanner.eos? && @source
173192
end
@@ -218,8 +237,8 @@ def current_line
218237
end
219238

220239
private
221-
def readline
222-
str = @source.readline(@line_break)
240+
def readline(term = nil)
241+
str = @source.readline(term || @line_break)
223242
if @pending_buffer
224243
if str.nil?
225244
str = @pending_buffer

test/test_document.rb

+11
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# -*- coding: utf-8 -*-
22
# frozen_string_literal: false
33

4+
require 'core_assertions'
5+
46
module REXMLTests
57
class TestDocument < Test::Unit::TestCase
8+
include Test::Unit::CoreAssertions
9+
610
def test_version_attributes_to_s
711
doc = REXML::Document.new(<<~eoxml)
812
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
@@ -198,6 +202,13 @@ def test_xml_declaration_standalone
198202
assert_equal('no', doc.stand_alone?, bug2539)
199203
end
200204

205+
def test_gt_linear_performance
206+
seq = [10000, 50000, 100000, 150000, 200000]
207+
assert_linear_performance(seq) do |n|
208+
REXML::Document.new('<test testing="' + ">" * n + '"></test>')
209+
end
210+
end
211+
201212
class WriteTest < Test::Unit::TestCase
202213
def setup
203214
@document = REXML::Document.new(<<-EOX)

0 commit comments

Comments
 (0)