File | /opt/wise/lib/perl5/5.10.0/Text/ParseWords.pm | Statements Executed | 18 | Total Time | 0.001052 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine | |
---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | BEGIN |
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | nested_quotewords |
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | old_shellwords |
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | parse_line |
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | quotewords |
0 | 0 | 0 | 0 | 0 | Text::ParseWords:: | shellwords |
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package Text::ParseWords; | |||
2 | ||||
3 | 3 | 4.0e-5 | 1.3e-5 | use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); # spent 68µs making 1 call to vars::import |
4 | 1 | 1.0e-6 | 1.0e-6 | $VERSION = "3.26"; |
5 | ||||
6 | 1 | 2.6e-5 | 2.6e-5 | require 5.000; |
7 | ||||
8 | 3 | 0.00034 | 0.00011 | use Exporter; # spent 28µs making 1 call to Exporter::import |
9 | 1 | 6.0e-6 | 6.0e-6 | @ISA = qw(Exporter); |
10 | 1 | 2.0e-6 | 2.0e-6 | @EXPORT = qw(shellwords quotewords nested_quotewords parse_line); |
11 | 1 | 1.0e-6 | 1.0e-6 | @EXPORT_OK = qw(old_shellwords); |
12 | ||||
13 | ||||
14 | sub shellwords { | |||
15 | my (@lines) = @_; | |||
16 | my @allwords; | |||
17 | ||||
18 | foreach my $line (@lines) { | |||
19 | $line =~ s/^\s+//; | |||
20 | my @words = parse_line('\s+', 0, $line); | |||
21 | pop @words if (@words and !defined $words[-1]); | |||
22 | return() unless (@words || !length($line)); | |||
23 | push(@allwords, @words); | |||
24 | } | |||
25 | return(@allwords); | |||
26 | } | |||
27 | ||||
28 | ||||
29 | ||||
30 | sub quotewords { | |||
31 | my($delim, $keep, @lines) = @_; | |||
32 | my($line, @words, @allwords); | |||
33 | ||||
34 | foreach $line (@lines) { | |||
35 | @words = parse_line($delim, $keep, $line); | |||
36 | return() unless (@words || !length($line)); | |||
37 | push(@allwords, @words); | |||
38 | } | |||
39 | return(@allwords); | |||
40 | } | |||
41 | ||||
42 | ||||
43 | ||||
44 | sub nested_quotewords { | |||
45 | my($delim, $keep, @lines) = @_; | |||
46 | my($i, @allwords); | |||
47 | ||||
48 | for ($i = 0; $i < @lines; $i++) { | |||
49 | @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]); | |||
50 | return() unless (@{$allwords[$i]} || !length($lines[$i])); | |||
51 | } | |||
52 | return(@allwords); | |||
53 | } | |||
54 | ||||
55 | ||||
56 | ||||
57 | sub parse_line { | |||
58 | my($delimiter, $keep, $line) = @_; | |||
59 | my($word, @pieces); | |||
60 | ||||
61 | 3 | 0.00029 | 9.5e-5 | no warnings 'uninitialized'; # we will be testing undef strings # spent 74µs making 1 call to warnings::unimport |
62 | ||||
63 | while (length($line)) { | |||
64 | # This pattern is optimised to be stack conservative on older perls. | |||
65 | # Do not refactor without being careful and testing it on very long strings. | |||
66 | # See Perl bug #42980 for an example of a stack busting input. | |||
67 | $line =~ s/^ | |||
68 | (?: | |||
69 | # double quoted string | |||
70 | (") # $quote | |||
71 | ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted | |||
72 | | # --OR-- | |||
73 | # singe quoted string | |||
74 | (') # $quote | |||
75 | ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted | |||
76 | | # --OR-- | |||
77 | # unquoted string | |||
78 | ( # $unquoted | |||
79 | (?:\\.|[^\\"'])*? | |||
80 | ) | |||
81 | # followed by | |||
82 | ( # $delim | |||
83 | \Z(?!\n) # EOL | |||
84 | | # --OR-- | |||
85 | (?-x:$delimiter) # delimiter | |||
86 | | # --OR-- | |||
87 | (?!^)(?=["']) # a quote | |||
88 | ) | |||
89 | )//xs or return; # extended layout | |||
90 | my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6); | |||
91 | ||||
92 | ||||
93 | return() unless( defined($quote) || length($unquoted) || length($delim)); | |||
94 | ||||
95 | if ($keep) { | |||
96 | $quoted = "$quote$quoted$quote"; | |||
97 | } | |||
98 | else { | |||
99 | $unquoted =~ s/\\(.)/$1/sg; | |||
100 | if (defined $quote) { | |||
101 | $quoted =~ s/\\(.)/$1/sg if ($quote eq '"'); | |||
102 | $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); | |||
103 | } | |||
104 | } | |||
105 | $word .= substr($line, 0, 0); # leave results tainted | |||
106 | $word .= defined $quote ? $quoted : $unquoted; | |||
107 | ||||
108 | if (length($delim)) { | |||
109 | push(@pieces, $word); | |||
110 | push(@pieces, $delim) if ($keep eq 'delimiters'); | |||
111 | undef $word; | |||
112 | } | |||
113 | if (!length($line)) { | |||
114 | push(@pieces, $word); | |||
115 | } | |||
116 | } | |||
117 | return(@pieces); | |||
118 | } | |||
119 | ||||
120 | ||||
121 | ||||
122 | sub old_shellwords { | |||
123 | ||||
124 | # Usage: | |||
125 | # use ParseWords; | |||
126 | # @words = old_shellwords($line); | |||
127 | # or | |||
128 | # @words = old_shellwords(@lines); | |||
129 | # or | |||
130 | # @words = old_shellwords(); # defaults to $_ (and clobbers it) | |||
131 | ||||
132 | 3 | 0.00034 | 0.00011 | no warnings 'uninitialized'; # we will be testing undef strings # spent 17µs making 1 call to warnings::unimport |
133 | local *_ = \join('', @_) if @_; | |||
134 | my (@words, $snippet); | |||
135 | ||||
136 | s/\A\s+//; | |||
137 | while ($_ ne '') { | |||
138 | my $field = substr($_, 0, 0); # leave results tainted | |||
139 | for (;;) { | |||
140 | if (s/\A"(([^"\\]|\\.)*)"//s) { | |||
141 | ($snippet = $1) =~ s#\\(.)#$1#sg; | |||
142 | } | |||
143 | elsif (/\A"/) { | |||
144 | require Carp; | |||
145 | Carp::carp("Unmatched double quote: $_"); | |||
146 | return(); | |||
147 | } | |||
148 | elsif (s/\A'(([^'\\]|\\.)*)'//s) { | |||
149 | ($snippet = $1) =~ s#\\(.)#$1#sg; | |||
150 | } | |||
151 | elsif (/\A'/) { | |||
152 | require Carp; | |||
153 | Carp::carp("Unmatched single quote: $_"); | |||
154 | return(); | |||
155 | } | |||
156 | elsif (s/\A\\(.?)//s) { | |||
157 | $snippet = $1; | |||
158 | } | |||
159 | elsif (s/\A([^\s\\'"]+)//) { | |||
160 | $snippet = $1; | |||
161 | } | |||
162 | else { | |||
163 | s/\A\s+//; | |||
164 | last; | |||
165 | } | |||
166 | $field .= $snippet; | |||
167 | } | |||
168 | push(@words, $field); | |||
169 | } | |||
170 | return @words; | |||
171 | } | |||
172 | ||||
173 | 1 | 7.0e-6 | 7.0e-6 | 1; |
174 | ||||
175 | __END__ | |||
176 | ||||
177 | =head1 NAME | |||
178 | ||||
179 | Text::ParseWords - parse text into an array of tokens or array of arrays | |||
180 | ||||
181 | =head1 SYNOPSIS | |||
182 | ||||
183 | use Text::ParseWords; | |||
184 | @lists = &nested_quotewords($delim, $keep, @lines); | |||
185 | @words = "ewords($delim, $keep, @lines); | |||
186 | @words = &shellwords(@lines); | |||
187 | @words = &parse_line($delim, $keep, $line); | |||
188 | @words = &old_shellwords(@lines); # DEPRECATED! | |||
189 | ||||
190 | =head1 DESCRIPTION | |||
191 | ||||
192 | The &nested_quotewords() and "ewords() functions accept a delimiter | |||
193 | (which can be a regular expression) | |||
194 | and a list of lines and then breaks those lines up into a list of | |||
195 | words ignoring delimiters that appear inside quotes. "ewords() | |||
196 | returns all of the tokens in a single long list, while &nested_quotewords() | |||
197 | returns a list of token lists corresponding to the elements of @lines. | |||
198 | &parse_line() does tokenizing on a single string. The &*quotewords() | |||
199 | functions simply call &parse_line(), so if you're only splitting | |||
200 | one line you can call &parse_line() directly and save a function | |||
201 | call. | |||
202 | ||||
203 | The $keep argument is a boolean flag. If true, then the tokens are | |||
204 | split on the specified delimiter, but all other characters (quotes, | |||
205 | backslashes, etc.) are kept in the tokens. If $keep is false then the | |||
206 | &*quotewords() functions remove all quotes and backslashes that are | |||
207 | not themselves backslash-escaped or inside of single quotes (i.e., | |||
208 | "ewords() tries to interpret these characters just like the Bourne | |||
209 | shell). NB: these semantics are significantly different from the | |||
210 | original version of this module shipped with Perl 5.000 through 5.004. | |||
211 | As an additional feature, $keep may be the keyword "delimiters" which | |||
212 | causes the functions to preserve the delimiters in each string as | |||
213 | tokens in the token lists, in addition to preserving quote and | |||
214 | backslash characters. | |||
215 | ||||
216 | &shellwords() is written as a special case of "ewords(), and it | |||
217 | does token parsing with whitespace as a delimiter-- similar to most | |||
218 | Unix shells. | |||
219 | ||||
220 | =head1 EXAMPLES | |||
221 | ||||
222 | The sample program: | |||
223 | ||||
224 | use Text::ParseWords; | |||
225 | @words = "ewords('\s+', 0, q{this is "a test" of\ quotewords \"for you}); | |||
226 | $i = 0; | |||
227 | foreach (@words) { | |||
228 | print "$i: <$_>\n"; | |||
229 | $i++; | |||
230 | } | |||
231 | ||||
232 | produces: | |||
233 | ||||
234 | 0: <this> | |||
235 | 1: <is> | |||
236 | 2: <a test> | |||
237 | 3: <of quotewords> | |||
238 | 4: <"for> | |||
239 | 5: <you> | |||
240 | ||||
241 | demonstrating: | |||
242 | ||||
243 | =over 4 | |||
244 | ||||
245 | =item 0 | |||
246 | ||||
247 | a simple word | |||
248 | ||||
249 | =item 1 | |||
250 | ||||
251 | multiple spaces are skipped because of our $delim | |||
252 | ||||
253 | =item 2 | |||
254 | ||||
255 | use of quotes to include a space in a word | |||
256 | ||||
257 | =item 3 | |||
258 | ||||
259 | use of a backslash to include a space in a word | |||
260 | ||||
261 | =item 4 | |||
262 | ||||
263 | use of a backslash to remove the special meaning of a double-quote | |||
264 | ||||
265 | =item 5 | |||
266 | ||||
267 | another simple word (note the lack of effect of the | |||
268 | backslashed double-quote) | |||
269 | ||||
270 | =back | |||
271 | ||||
272 | Replacing C<"ewords('\s+', 0, q{this is...})> | |||
273 | with C<&shellwords(q{this is...})> | |||
274 | is a simpler way to accomplish the same thing. | |||
275 | ||||
276 | =head1 AUTHORS | |||
277 | ||||
278 | Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original | |||
279 | author unknown). Much of the code for &parse_line() (including the | |||
280 | primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>. | |||
281 | ||||
282 | Examples section another documentation provided by John Heidemann | |||
283 | <johnh@ISI.EDU> | |||
284 | ||||
285 | Bug reports, patches, and nagging provided by lots of folks-- thanks | |||
286 | everybody! Special thanks to Michael Schwern <schwern@envirolink.org> | |||
287 | for assuring me that a &nested_quotewords() would be useful, and to | |||
288 | Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about | |||
289 | error-checking (sort of-- you had to be there). | |||
290 | ||||
291 | =cut |