Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1852,6 +1852,80 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
return new_ary;
}

/*
* call-seq:
* integer_at(index) -> integer or nil
*
* Returns the captured substring at the given +index+ as an Integer,
* without creating an intermediate String object.
*
* Returns +nil+ if the most recent match failed, or if the capture
* at +index+ is out of range, or if the capture did not participate
* in the match.
*
* This is semantically equivalent to <tt>self[index].to_i</tt> but
* avoids the allocation of a temporary String.
*
* scanner = StringScanner.new("2024-06-15")
* scanner.scan(/(\d{4})-(\d{2})-(\d{2})/)
* scanner.integer_at(1) # => 2024
* scanner.integer_at(2) # => 6
* scanner.integer_at(3) # => 15
* scanner.integer_at(0) # => 20240615 (entire match as integer)
*
*/
/* rb_int_parse_cstr is declared in internal/bignum.h which is not
* available to extensions. Declare it here since the symbol is
* exported from libruby. */
VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp,
size_t *ndigits, int base, int flags);
#define RB_INT_PARSE_SIGN 0x01

Comment on lines +1877 to +1883
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you move this from between the call-seq comment and strscan_integer_at() definition for RDoc?

static VALUE
strscan_integer_at(VALUE self, VALUE idx)
{
struct strscanner *p;
long i;
long beg, end, len;
const char *ptr;

GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;

i = NUM2LONG(idx);

if (i < 0)
i += p->regs.num_regs;
if (i < 0) return Qnil;
if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil;

beg = adjust_register_position(p, p->regs.beg[i]);
end = adjust_register_position(p, p->regs.end[i]);
len = end - beg;

if (len <= 0) {
rb_raise(rb_eArgError, "empty capture for integer conversion");
}

ptr = S_PBEG(p) + beg;

/* Parse directly from source bytes without buffer allocation.
* rb_int_parse_cstr accepts a length so no NUL-termination needed.
* Use endp to verify the entire capture was consumed as digits. */
{
char *endp;
VALUE integer = rb_int_parse_cstr(ptr, len, &endp, NULL, 10, RB_INT_PARSE_SIGN);

if (endp != ptr + len) {
rb_raise(rb_eArgError,
"non-digit character in capture: %.*s",
(int)len, ptr);
}
return integer;
}
}

/*
* :markup: markdown
* :include: strscan/link_refs.txt
Expand Down Expand Up @@ -2290,6 +2364,7 @@ Init_strscan(void)
rb_define_method(StringScanner, "size", strscan_size, 0);
rb_define_method(StringScanner, "captures", strscan_captures, 0);
rb_define_method(StringScanner, "values_at", strscan_values_at, -1);
rb_define_method(StringScanner, "integer_at", strscan_integer_at, 1);

rb_define_method(StringScanner, "rest", strscan_rest, 0);
rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
Expand Down
106 changes: 106 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,112 @@ def test_named_captures_same_name_union
assert_equal({"number" => "1"}, scan.named_captures)
end

def test_integer_at
s = create_string_scanner("2024-06-15")
s.scan(/(\d{4})-(\d{2})-(\d{2})/)
assert_equal(2024, s.integer_at(1))
assert_equal(6, s.integer_at(2))
assert_equal(15, s.integer_at(3))
end

def test_integer_at_index_zero
s = create_string_scanner("42 abc")
s.scan(/(\d+)/)
assert_equal(42, s.integer_at(0))
end

def test_integer_at_negative_index
s = create_string_scanner("2024-06-15")
s.scan(/(\d{4})-(\d{2})-(\d{2})/)
assert_equal(15, s.integer_at(-1))
assert_equal(6, s.integer_at(-2))
assert_equal(2024, s.integer_at(-3))
end

def test_integer_at_no_match
s = create_string_scanner("abc")
s.scan(/\d+/)
assert_nil(s.integer_at(0))
end

def test_integer_at_before_match
s = create_string_scanner("abc")
assert_nil(s.integer_at(0))
end

def test_integer_at_index_out_of_range
s = create_string_scanner("42")
s.scan(/(\d+)/)
assert_nil(s.integer_at(2))
assert_nil(s.integer_at(100))
assert_nil(s.integer_at(-3))
end

def test_integer_at_optional_group_not_matched
s = create_string_scanner("2024-06")
s.scan(/(\d{4})-(\d{2})(-(\d{2}))?/)
assert_equal(2024, s.integer_at(1))
assert_equal(6, s.integer_at(2))
assert_nil(s.integer_at(4))
end

def test_integer_at_large_number
huge = '9' * 100
s = create_string_scanner(huge)
s.scan(/(#{huge})/)
assert_equal(huge.to_i, s.integer_at(1))
end

def test_integer_at_non_digit
s = create_string_scanner("1.5")
s.scan(/([\d.]+)/)
assert_raise(ArgumentError) { s.integer_at(1) }
end

def test_integer_at_non_digit_alpha
s = create_string_scanner("foo bar")
s.scan(/(\w+)/)
assert_raise(ArgumentError) { s.integer_at(1) }
end

def test_integer_at_empty_capture
s = create_string_scanner("abc")
s.scan(/()abc/)
assert_raise(ArgumentError) { s.integer_at(1) }
end

def test_integer_at_sign_only
s = create_string_scanner("+")
s.scan(/([+\-])/)
assert_raise(ArgumentError) { s.integer_at(1) }

s = create_string_scanner("-")
s.scan(/([+\-])/)
assert_raise(ArgumentError) { s.integer_at(1) }
end

def test_integer_at_signed_number
s = create_string_scanner("-42")
s.scan(/([+\-]?\d+)/)
assert_equal(-42, s.integer_at(1))

s = create_string_scanner("+42")
s.scan(/([+\-]?\d+)/)
assert_equal(42, s.integer_at(1))
end

def test_integer_at_leading_zeros
s = create_string_scanner("007")
s.scan(/(\d+)/)
assert_equal(7, s.integer_at(1))
end

def test_integer_at_full_match_with_non_digits
s = create_string_scanner("2024-06-15")
s.scan(/(\d{4})-(\d{2})-(\d{2})/)
assert_raise(ArgumentError) { s.integer_at(0) }
end

def test_scan_integer
s = create_string_scanner('abc')
assert_equal(3, s.match?(/(?<a>abc)/)) # set named_captures
Expand Down