← Index
Performance Profile   « block view • line view • sub view »
For /wise/base/deliv/dev/bin/getfix
  Run on Thu May 20 15:30:03 2010
Reported on Thu May 20 16:25:31 2010

File/opt/wise/lib/perl5/5.10.0/Text/ParseWords.pm
Statements Executed18
Total Time0.001052 seconds

Subroutines — ordered by exclusive time
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
00000Text::ParseWords::BEGIN
00000Text::ParseWords::nested_quotewords
00000Text::ParseWords::old_shellwords
00000Text::ParseWords::parse_line
00000Text::ParseWords::quotewords
00000Text::ParseWords::shellwords

LineStmts.Exclusive
Time
Avg.Code
1package Text::ParseWords;
2
334.0e-51.3e-5use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
# spent 68µs making 1 call to vars::import
411.0e-61.0e-6$VERSION = "3.26";
5
612.6e-52.6e-5require 5.000;
7
830.000340.00011use Exporter;
# spent 28µs making 1 call to Exporter::import
916.0e-66.0e-6@ISA = qw(Exporter);
1012.0e-62.0e-6@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
1111.0e-61.0e-6@EXPORT_OK = qw(old_shellwords);
12
13
14sub shellwords {
15 my (@lines) = @_;
16 my @allwords;
17
18 foreach my $line (@lines) {
19 $line =~ s/^\s+//;
20 my @words = parse_line('\s+', 0, $line);
21 pop @words if (@words and !defined $words[-1]);
22 return() unless (@words || !length($line));
23 push(@allwords, @words);
24 }
25 return(@allwords);
26}
27
28
29
30sub quotewords {
31 my($delim, $keep, @lines) = @_;
32 my($line, @words, @allwords);
33
34 foreach $line (@lines) {
35 @words = parse_line($delim, $keep, $line);
36 return() unless (@words || !length($line));
37 push(@allwords, @words);
38 }
39 return(@allwords);
40}
41
42
43
44sub nested_quotewords {
45 my($delim, $keep, @lines) = @_;
46 my($i, @allwords);
47
48 for ($i = 0; $i < @lines; $i++) {
49 @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
50 return() unless (@{$allwords[$i]} || !length($lines[$i]));
51 }
52 return(@allwords);
53}
54
55
56
57sub parse_line {
58 my($delimiter, $keep, $line) = @_;
59 my($word, @pieces);
60
6130.000299.5e-5 no warnings 'uninitialized'; # we will be testing undef strings
# spent 74µs making 1 call to warnings::unimport
62
63 while (length($line)) {
64 # This pattern is optimised to be stack conservative on older perls.
65 # Do not refactor without being careful and testing it on very long strings.
66 # See Perl bug #42980 for an example of a stack busting input.
67 $line =~ s/^
68 (?:
69 # double quoted string
70 (") # $quote
71 ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted
72 | # --OR--
73 # singe quoted string
74 (') # $quote
75 ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted
76 | # --OR--
77 # unquoted string
78 ( # $unquoted
79 (?:\\.|[^\\"'])*?
80 )
81 # followed by
82 ( # $delim
83 \Z(?!\n) # EOL
84 | # --OR--
85 (?-x:$delimiter) # delimiter
86 | # --OR--
87 (?!^)(?=["']) # a quote
88 )
89 )//xs or return; # extended layout
90 my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
91
92
93 return() unless( defined($quote) || length($unquoted) || length($delim));
94
95 if ($keep) {
96 $quoted = "$quote$quoted$quote";
97 }
98 else {
99 $unquoted =~ s/\\(.)/$1/sg;
100 if (defined $quote) {
101 $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
102 $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
103 }
104 }
105 $word .= substr($line, 0, 0); # leave results tainted
106 $word .= defined $quote ? $quoted : $unquoted;
107
108 if (length($delim)) {
109 push(@pieces, $word);
110 push(@pieces, $delim) if ($keep eq 'delimiters');
111 undef $word;
112 }
113 if (!length($line)) {
114 push(@pieces, $word);
115 }
116 }
117 return(@pieces);
118}
119
120
121
122sub old_shellwords {
123
124 # Usage:
125 # use ParseWords;
126 # @words = old_shellwords($line);
127 # or
128 # @words = old_shellwords(@lines);
129 # or
130 # @words = old_shellwords(); # defaults to $_ (and clobbers it)
131
13230.000340.00011 no warnings 'uninitialized'; # we will be testing undef strings
# spent 17µs making 1 call to warnings::unimport
133 local *_ = \join('', @_) if @_;
134 my (@words, $snippet);
135
136 s/\A\s+//;
137 while ($_ ne '') {
138 my $field = substr($_, 0, 0); # leave results tainted
139 for (;;) {
140 if (s/\A"(([^"\\]|\\.)*)"//s) {
141 ($snippet = $1) =~ s#\\(.)#$1#sg;
142 }
143 elsif (/\A"/) {
144 require Carp;
145 Carp::carp("Unmatched double quote: $_");
146 return();
147 }
148 elsif (s/\A'(([^'\\]|\\.)*)'//s) {
149 ($snippet = $1) =~ s#\\(.)#$1#sg;
150 }
151 elsif (/\A'/) {
152 require Carp;
153 Carp::carp("Unmatched single quote: $_");
154 return();
155 }
156 elsif (s/\A\\(.?)//s) {
157 $snippet = $1;
158 }
159 elsif (s/\A([^\s\\'"]+)//) {
160 $snippet = $1;
161 }
162 else {
163 s/\A\s+//;
164 last;
165 }
166 $field .= $snippet;
167 }
168 push(@words, $field);
169 }
170 return @words;
171}
172
17317.0e-67.0e-61;
174
175__END__
176
177=head1 NAME
178
179Text::ParseWords - parse text into an array of tokens or array of arrays
180
181=head1 SYNOPSIS
182
183 use Text::ParseWords;
184 @lists = &nested_quotewords($delim, $keep, @lines);
185 @words = &quotewords($delim, $keep, @lines);
186 @words = &shellwords(@lines);
187 @words = &parse_line($delim, $keep, $line);
188 @words = &old_shellwords(@lines); # DEPRECATED!
189
190=head1 DESCRIPTION
191
192The &nested_quotewords() and &quotewords() functions accept a delimiter
193(which can be a regular expression)
194and a list of lines and then breaks those lines up into a list of
195words ignoring delimiters that appear inside quotes. &quotewords()
196returns all of the tokens in a single long list, while &nested_quotewords()
197returns a list of token lists corresponding to the elements of @lines.
198&parse_line() does tokenizing on a single string. The &*quotewords()
199functions simply call &parse_line(), so if you're only splitting
200one line you can call &parse_line() directly and save a function
201call.
202
203The $keep argument is a boolean flag. If true, then the tokens are
204split on the specified delimiter, but all other characters (quotes,
205backslashes, etc.) are kept in the tokens. If $keep is false then the
206&*quotewords() functions remove all quotes and backslashes that are
207not themselves backslash-escaped or inside of single quotes (i.e.,
208&quotewords() tries to interpret these characters just like the Bourne
209shell). NB: these semantics are significantly different from the
210original version of this module shipped with Perl 5.000 through 5.004.
211As an additional feature, $keep may be the keyword "delimiters" which
212causes the functions to preserve the delimiters in each string as
213tokens in the token lists, in addition to preserving quote and
214backslash characters.
215
216&shellwords() is written as a special case of &quotewords(), and it
217does token parsing with whitespace as a delimiter-- similar to most
218Unix shells.
219
220=head1 EXAMPLES
221
222The sample program:
223
224 use Text::ParseWords;
225 @words = &quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you});
226 $i = 0;
227 foreach (@words) {
228 print "$i: <$_>\n";
229 $i++;
230 }
231
232produces:
233
234 0: <this>
235 1: <is>
236 2: <a test>
237 3: <of quotewords>
238 4: <"for>
239 5: <you>
240
241demonstrating:
242
243=over 4
244
245=item 0
246
247a simple word
248
249=item 1
250
251multiple spaces are skipped because of our $delim
252
253=item 2
254
255use of quotes to include a space in a word
256
257=item 3
258
259use of a backslash to include a space in a word
260
261=item 4
262
263use of a backslash to remove the special meaning of a double-quote
264
265=item 5
266
267another simple word (note the lack of effect of the
268backslashed double-quote)
269
270=back
271
272Replacing C<&quotewords('\s+', 0, q{this is...})>
273with C<&shellwords(q{this is...})>
274is a simpler way to accomplish the same thing.
275
276=head1 AUTHORS
277
278Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
279author unknown). Much of the code for &parse_line() (including the
280primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
281
282Examples section another documentation provided by John Heidemann
283<johnh@ISI.EDU>
284
285Bug reports, patches, and nagging provided by lots of folks-- thanks
286everybody! Special thanks to Michael Schwern <schwern@envirolink.org>
287for assuring me that a &nested_quotewords() would be useful, and to
288Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
289error-checking (sort of-- you had to be there).
290
291=cut