1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
|
From libc-alpha-return-36620-listarch-libc-alpha=sources dot redhat dot com at sourceware dot org Tue Jan 29 16:13:50 2013
Return-Path: <libc-alpha-return-36620-listarch-libc-alpha=sources dot redhat dot com at sourceware dot org>
Delivered-To: listarch-libc-alpha at sources dot redhat dot com
Received: (qmail 32082 invoked by alias); 29 Jan 2013 16:13:48 -0000
Received: (qmail 32049 invoked by uid 22791); 29 Jan 2013 16:13:43 -0000
X-SWARE-Spam-Status: No, hits=-5.2 required=5.0
tests=AWL,BAYES_00,KHOP_RCVD_UNTRUST,RCVD_IN_DNSWL_HI,RP_MATCHES_RCVD,TW_BK
X-Spam-Check-By: sourceware.org
From: Andreas Schwab <schwab at suse dot de>
To: libc-alpha at sourceware dot org
Subject: [PATCH] Fix buffer overrun in regexp matcher
X-Yow: Are you selling NYLON OIL WELLS?? If so, we can use TWO DOZEN!!
Date: Tue, 29 Jan 2013 17:13:35 +0100
Message-ID: <mvma9rsq85s.fsf@hawking.suse.de>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.2.92 (gnu/linux)
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
Mailing-List: contact libc-alpha-help at sourceware dot org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe at sourceware dot org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha at sourceware dot org>
List-Help: <mailto:libc-alpha-help at sourceware dot org>, <http://sourceware dot org/ml/#faqs>
Sender: libc-alpha-owner at sourceware dot org
Delivered-To: mailing list libc-alpha at sourceware dot org
When extending regex buffers, make sure we allocate enough room for the
state log. Merely doubling the space may not be enough if the current
node has accepted a long run of characters. This part of the code only
triggers with multibyte characters.
Andreas.
[BZ #15078]
* posix/regexec.c (extend_buffers): Add parameter min_len.
(check_matching): Pass minimum needed length.
(clean_state_log_if_needed): Likewise.
(get_subexp): Likewise.
* posix/Makefile (tests): Add bug-regex34.
(bug-regex34-ENV): Define.
* posix/bug-regex34.c: New file.
diff --git a/posix/Makefile b/posix/Makefile
index 57672d8..6ceb440 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \
tst-rfc3484-3 \
tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
- bug-getopt5 tst-getopt_long1
+ bug-getopt5 tst-getopt_long1 bug-regex34
xtests := bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
@@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata
+bug-regex34-ENV = LOCPATH=$(common-objpfx)localedata
tst-rxspencer-ARGS = --utf8 rxspencer/tests
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
tst-pcre-ARGS = PCRE.tests
diff --git a/posix/bug-regex34.c b/posix/bug-regex34.c
new file mode 100644
index 0000000..bb3b613
--- /dev/null
+++ b/posix/bug-regex34.c
@@ -0,0 +1,46 @@
+/* Test re_search with multi-byte characters in UTF-8.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#include <regex.h>
+
+static int
+do_test (void)
+{
+ struct re_pattern_buffer r;
+ /* ????????x */
+ const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax";
+
+ if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+ {
+ puts ("setlocale failed");
+ return 1;
+ }
+ memset (&r, 0, sizeof (r));
+
+ re_compile_pattern ("[^x]x", 5, &r);
+ /* This was triggering a buffer overflow. */
+ re_search (&r, s, strlen (s), 0, strlen (s), 0);
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regexec.c b/posix/regexec.c
index 7f2de85..5ca2bf6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
static int check_node_accept (const re_match_context_t *mctx,
const re_token_t *node, int idx)
internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
internal_function;
/* Entry point for POSIX code. */
@@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
&& mctx->input.valid_len < mctx->input.len))
{
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_char_idx + 1);
if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
@@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
&& mctx->input.valid_len < mctx->input.len))
{
reg_errcode_t err;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_state_log_idx + 1);
if (BE (err != REG_NOERROR, 0))
return err;
}
@@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
if (bkref_str_off >= mctx->input.len)
break;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, bkref_str_off + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
static reg_errcode_t
internal_function __attribute_warn_unused_result__
-extend_buffers (re_match_context_t *mctx)
+extend_buffers (re_match_context_t *mctx, int min_len)
{
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
@@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx)
if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
return REG_ESPACE;
- /* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
+ /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
+ ret = re_string_realloc_buffers (pstr,
+ MAX (min_len,
+ MIN (pstr->len, pstr->bufs_len * 2)));
if (BE (ret != REG_NOERROR, 0))
return ret;
--
1.8.1.2
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
|