Profile: /opt/wise/lib/perl5/5.10.0/Text/ParseWords.pm

File	/opt/wise/lib/perl5/5.10.0/Text/ParseWords.pm
Statements Executed	18
Total Time	0.001052 seconds

Subroutines — ordered by exclusive time
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
0	0	0	0	0	Text::ParseWords::	BEGIN
0	0	0	0	0	Text::ParseWords::	nested_quotewords
0	0	0	0	0	Text::ParseWords::	old_shellwords
0	0	0	0	0	Text::ParseWords::	parse_line
0	0	0	0	0	Text::ParseWords::	quotewords
0	0	0	0	0	Text::ParseWords::	shellwords

Line	Stmts.	Exclusive Time	Avg.	Code
1				package Text::ParseWords;
2
3	3	4.0e-5	1.3e-5	use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); # spent 68µs making 1 call to vars::import
4	1	1.0e-6	1.0e-6	$VERSION = "3.26";
5
6	1	2.6e-5	2.6e-5	require 5.000;
7
8	3	0.00034	0.00011	use Exporter; # spent 28µs making 1 call to Exporter::import
9	1	6.0e-6	6.0e-6	@ISA = qw(Exporter);
10	1	2.0e-6	2.0e-6	@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
11	1	1.0e-6	1.0e-6	@EXPORT_OK = qw(old_shellwords);
12
13
14				sub shellwords {
15				my (@lines) = @_;
16				my @allwords;
17
18				foreach my $line (@lines) {
19				$line =~ s/^\s+//;
20				my @words = parse_line('\s+', 0, $line);
21				pop @words if (@words and !defined $words[-1]);
22				return() unless (@words \|\| !length($line));
23				push(@allwords, @words);
24				}
25				return(@allwords);
26				}
27
28
29
30				sub quotewords {
31				my($delim, $keep, @lines) = @_;
32				my($line, @words, @allwords);
33
34				foreach $line (@lines) {
35				@words = parse_line($delim, $keep, $line);
36				return() unless (@words \|\| !length($line));
37				push(@allwords, @words);
38				}
39				return(@allwords);
40				}
41
42
43
44				sub nested_quotewords {
45				my($delim, $keep, @lines) = @_;
46				my($i, @allwords);
47
48				for ($i = 0; $i < @lines; $i++) {
49				@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
50				return() unless (@{$allwords[$i]} \|\| !length($lines[$i]));
51				}
52				return(@allwords);
53				}
54
55
56
57				sub parse_line {
58				my($delimiter, $keep, $line) = @_;
59				my($word, @pieces);
60
61	3	0.00029	9.5e-5	no warnings 'uninitialized'; # we will be testing undef strings # spent 74µs making 1 call to warnings::unimport
62
63				while (length($line)) {
64				# This pattern is optimised to be stack conservative on older perls.
65				# Do not refactor without being careful and testing it on very long strings.
66				# See Perl bug #42980 for an example of a stack busting input.
67				$line =~ s/^
68				(?:
69				# double quoted string
70				(") # $quote
71				((?>[^\\"](?:\\.[^\\"])*))" # $quoted
72				\| # --OR--
73				# singe quoted string
74				(') # $quote
75				((?>[^\\'](?:\\.[^\\'])*))' # $quoted
76				\| # --OR--
77				# unquoted string
78				( # $unquoted
79				(?:\\.\|[^\\"'])*?
80				)
81				# followed by
82				( # $delim
83				\Z(?!\n) # EOL
84				\| # --OR--
85				(?-x:$delimiter) # delimiter
86				\| # --OR--
87				(?!^)(?=["']) # a quote
88				)
89				)//xs or return; # extended layout
90				my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
91
92
93				return() unless( defined($quote) \|\| length($unquoted) \|\| length($delim));
94
95				if ($keep) {
96				$quoted = "$quote$quoted$quote";
97				}
98				else {
99				$unquoted =~ s/\\(.)/$1/sg;
100				if (defined $quote) {
101				$quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
102				$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
103				}
104				}
105				$word .= substr($line, 0, 0); # leave results tainted
106				$word .= defined $quote ? $quoted : $unquoted;
107
108				if (length($delim)) {
109				push(@pieces, $word);
110				push(@pieces, $delim) if ($keep eq 'delimiters');
111				undef $word;
112				}
113				if (!length($line)) {
114				push(@pieces, $word);
115				}
116				}
117				return(@pieces);
118				}
119
120
121
122				sub old_shellwords {
123
124				# Usage:
125				# use ParseWords;
126				# @words = old_shellwords($line);
127				# or
128				# @words = old_shellwords(@lines);
129				# or
130				# @words = old_shellwords(); # defaults to $_ (and clobbers it)
131
132	3	0.00034	0.00011	no warnings 'uninitialized'; # we will be testing undef strings # spent 17µs making 1 call to warnings::unimport
133				local *_ = \join('', @_) if @_;
134				my (@words, $snippet);
135
136				s/\A\s+//;
137				while ($_ ne '') {
138				my $field = substr($_, 0, 0); # leave results tainted
139				for (;;) {
140				if (s/\A"(([^"\\]\|\\.)*)"//s) {
141				($snippet = $1) =~ s#\\(.)#$1#sg;
142				}
143				elsif (/\A"/) {
144				require Carp;
145				Carp::carp("Unmatched double quote: $_");
146				return();
147				}
148				elsif (s/\A'(([^'\\]\|\\.)*)'//s) {
149				($snippet = $1) =~ s#\\(.)#$1#sg;
150				}
151				elsif (/\A'/) {
152				require Carp;
153				Carp::carp("Unmatched single quote: $_");
154				return();
155				}
156				elsif (s/\A\\(.?)//s) {
157				$snippet = $1;
158				}
159				elsif (s/\A([^\s\\'"]+)//) {
160				$snippet = $1;
161				}
162				else {
163				s/\A\s+//;
164				last;
165				}
166				$field .= $snippet;
167				}
168				push(@words, $field);
169				}
170				return @words;
171				}
172
173	1	7.0e-6	7.0e-6	1;
174
175				__END__
176
177				=head1 NAME
178
179				Text::ParseWords - parse text into an array of tokens or array of arrays
180
181				=head1 SYNOPSIS
182
183				use Text::ParseWords;
184				@lists = &nested_quotewords($delim, $keep, @lines);
185				@words = &quotewords($delim, $keep, @lines);
186				@words = &shellwords(@lines);
187				@words = &parse_line($delim, $keep, $line);
188				@words = &old_shellwords(@lines); # DEPRECATED!
189
190				=head1 DESCRIPTION
191
192				The &nested_quotewords() and &quotewords() functions accept a delimiter
193				(which can be a regular expression)
194				and a list of lines and then breaks those lines up into a list of
195				words ignoring delimiters that appear inside quotes. &quotewords()
196				returns all of the tokens in a single long list, while &nested_quotewords()
197				returns a list of token lists corresponding to the elements of @lines.
198				&parse_line() does tokenizing on a single string. The &*quotewords()
199				functions simply call &parse_line(), so if you're only splitting
200				one line you can call &parse_line() directly and save a function
201				call.
202
203				The $keep argument is a boolean flag. If true, then the tokens are
204				split on the specified delimiter, but all other characters (quotes,
205				backslashes, etc.) are kept in the tokens. If $keep is false then the
206				&*quotewords() functions remove all quotes and backslashes that are
207				not themselves backslash-escaped or inside of single quotes (i.e.,
208				&quotewords() tries to interpret these characters just like the Bourne
209				shell). NB: these semantics are significantly different from the
210				original version of this module shipped with Perl 5.000 through 5.004.
211				As an additional feature, $keep may be the keyword "delimiters" which
212				causes the functions to preserve the delimiters in each string as
213				tokens in the token lists, in addition to preserving quote and
214				backslash characters.
215
216				&shellwords() is written as a special case of &quotewords(), and it
217				does token parsing with whitespace as a delimiter-- similar to most
218				Unix shells.
219
220				=head1 EXAMPLES
221
222				The sample program:
223
224				use Text::ParseWords;
225				@words = &quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you});
226				$i = 0;
227				foreach (@words) {
228				print "$i: <$_>\n";
229				$i++;
230				}
231
232				produces:
233
234				0: <this>
235				1: <is>
236				2: <a test>
237				3: <of quotewords>
238				4: <"for>
239				5: <you>
240
241				demonstrating:
242
243				=over 4
244
245				=item 0
246
247				a simple word
248
249				=item 1
250
251				multiple spaces are skipped because of our $delim
252
253				=item 2
254
255				use of quotes to include a space in a word
256
257				=item 3
258
259				use of a backslash to include a space in a word
260
261				=item 4
262
263				use of a backslash to remove the special meaning of a double-quote
264
265				=item 5
266
267				another simple word (note the lack of effect of the
268				backslashed double-quote)
269
270				=back
271
272				Replacing C<&quotewords('\s+', 0, q{this is...})>
273				with C<&shellwords(q{this is...})>
274				is a simpler way to accomplish the same thing.
275
276				=head1 AUTHORS
277
278				Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
279				author unknown). Much of the code for &parse_line() (including the
280				primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
281
282				Examples section another documentation provided by John Heidemann
283				<johnh@ISI.EDU>
284
285				Bug reports, patches, and nagging provided by lots of folks-- thanks
286				everybody! Special thanks to Michael Schwern <schwern@envirolink.org>
287				for assuring me that a &nested_quotewords() would be useful, and to
288				Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
289				error-checking (sort of-- you had to be there).
290
291				=cut