File | /opt/wise/lib/perl5/5.10.0/x86_64-linux-thread-multi/re.pm | Statements Executed | 30 | Total Time | 0.001181 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine | |
---|---|---|---|---|---|---|
1 | 1 | 1 | 2.1e-5 | 2.1e-5 | re:: | bits |
1 | 1 | 1 | 2.1e-5 | 4.2e-5 | re:: | import |
0 | 0 | 0 | 0 | 0 | re:: | BEGIN |
0 | 0 | 0 | 0 | 0 | re:: | _do_install |
0 | 0 | 0 | 0 | 0 | re:: | _load_unload |
0 | 0 | 0 | 0 | 0 | re:: | setcolor |
0 | 0 | 0 | 0 | 0 | re:: | unimport |
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package re; | |||
2 | ||||
3 | # pragma for controlling the regex engine | |||
4 | 3 | 3.8e-5 | 1.3e-5 | use strict; # spent 22µs making 1 call to strict::import |
5 | 3 | 0.00104 | 0.00035 | use warnings; # spent 51µs making 1 call to warnings::import |
6 | ||||
7 | 1 | 1.0e-6 | 1.0e-6 | our $VERSION = "0.08"; |
8 | 1 | 1.1e-5 | 1.1e-5 | our @ISA = qw(Exporter); |
9 | 1 | 2.0e-6 | 2.0e-6 | our @EXPORT_OK = qw(is_regexp regexp_pattern regmust |
10 | regname regnames regnames_count); | |||
11 | 1 | 1.0e-5 | 1.0e-5 | our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; |
12 | ||||
13 | # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** | |||
14 | # | |||
15 | # If you modify these values see comment below! | |||
16 | ||||
17 | 1 | 4.0e-6 | 4.0e-6 | my %bitmask = ( |
18 | taint => 0x00100000, # HINT_RE_TAINT | |||
19 | eval => 0x00200000, # HINT_RE_EVAL | |||
20 | ); | |||
21 | ||||
22 | # - File::Basename contains a literal for 'taint' as a fallback. If | |||
23 | # taint is changed here, File::Basename must be updated as well. | |||
24 | # | |||
25 | # - ExtUtils::ParseXS uses a hardcoded | |||
26 | # BEGIN { $^H |= 0x00200000 } | |||
27 | # in it to allow re.xs to be built. So if 'eval' is changed here then | |||
28 | # ExtUtils::ParseXS must be changed as well. | |||
29 | # | |||
30 | # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** | |||
31 | ||||
32 | sub setcolor { | |||
33 | eval { # Ignore errors | |||
34 | require Term::Cap; | |||
35 | ||||
36 | my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. | |||
37 | my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; | |||
38 | my @props = split /,/, $props; | |||
39 | my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; | |||
40 | ||||
41 | $colors =~ s/\0//g; | |||
42 | $ENV{PERL_RE_COLORS} = $colors; | |||
43 | }; | |||
44 | if ($@) { | |||
45 | $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t'; | |||
46 | } | |||
47 | ||||
48 | } | |||
49 | ||||
50 | 1 | 1.0e-5 | 1.0e-5 | my %flags = ( |
51 | COMPILE => 0x0000FF, | |||
52 | PARSE => 0x000001, | |||
53 | OPTIMISE => 0x000002, | |||
54 | TRIEC => 0x000004, | |||
55 | DUMP => 0x000008, | |||
56 | FLAGS => 0x000010, | |||
57 | ||||
58 | EXECUTE => 0x00FF00, | |||
59 | INTUIT => 0x000100, | |||
60 | MATCH => 0x000200, | |||
61 | TRIEE => 0x000400, | |||
62 | ||||
63 | EXTRA => 0xFF0000, | |||
64 | TRIEM => 0x010000, | |||
65 | OFFSETS => 0x020000, | |||
66 | OFFSETSDBG => 0x040000, | |||
67 | STATE => 0x080000, | |||
68 | OPTIMISEM => 0x100000, | |||
69 | STACK => 0x280000, | |||
70 | BUFFERS => 0x400000, | |||
71 | ); | |||
72 | 1 | 3.0e-6 | 3.0e-6 | $flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS}); |
73 | 1 | 2.0e-6 | 2.0e-6 | $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; |
74 | 1 | 1.0e-6 | 1.0e-6 | $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; |
75 | 1 | 2.0e-6 | 2.0e-6 | $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; |
76 | 1 | 2.0e-6 | 2.0e-6 | $flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; |
77 | 1 | 1.0e-6 | 1.0e-6 | $flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; |
78 | ||||
79 | 1 | 0 | 0 | my $installed; |
80 | 1 | 1.0e-6 | 1.0e-6 | my $installed_error; |
81 | ||||
82 | sub _do_install { | |||
83 | if ( ! defined($installed) ) { | |||
84 | require XSLoader; | |||
85 | $installed = eval { XSLoader::load('re', $VERSION) } || 0; | |||
86 | $installed_error = $@; | |||
87 | } | |||
88 | } | |||
89 | ||||
90 | sub _load_unload { | |||
91 | my ($on)= @_; | |||
92 | if ($on) { | |||
93 | _do_install(); | |||
94 | if ( ! $installed ) { | |||
95 | die "'re' not installed!? ($installed_error)"; | |||
96 | } else { | |||
97 | # We call install() every time, as if we didn't, we wouldn't | |||
98 | # "see" any changes to the color environment var since | |||
99 | # the last time it was called. | |||
100 | ||||
101 | # install() returns an integer, which if casted properly | |||
102 | # in C resolves to a structure containing the regex | |||
103 | # hooks. Setting it to a random integer will guarantee | |||
104 | # segfaults. | |||
105 | $^H{regcomp} = install(); | |||
106 | } | |||
107 | } else { | |||
108 | delete $^H{regcomp}; | |||
109 | } | |||
110 | } | |||
111 | ||||
112 | # spent 21µs within re::bits which was called
# once (21µs+0) by re::import at line 161 | |||
113 | 5 | 2.1e-5 | 4.2e-6 | my $on = shift; |
114 | my $bits = 0; | |||
115 | unless (@_) { | |||
116 | require Carp; | |||
117 | Carp::carp("Useless use of \"re\" pragma"); | |||
118 | } | |||
119 | foreach my $idx (0..$#_){ | |||
120 | 2 | 5.0e-6 | 2.5e-6 | my $s=$_[$idx]; |
121 | if ($s eq 'Debug' or $s eq 'Debugcolor') { | |||
122 | setcolor() if $s =~/color/i; | |||
123 | ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; | |||
124 | for my $idx ($idx+1..$#_) { | |||
125 | if ($flags{$_[$idx]}) { | |||
126 | if ($on) { | |||
127 | ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; | |||
128 | } else { | |||
129 | ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; | |||
130 | } | |||
131 | } else { | |||
132 | require Carp; | |||
133 | Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", | |||
134 | join(", ",sort keys %flags ) ); | |||
135 | } | |||
136 | } | |||
137 | _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); | |||
138 | last; | |||
139 | } elsif ($s eq 'debug' or $s eq 'debugcolor') { | |||
140 | setcolor() if $s =~/color/i; | |||
141 | _load_unload($on); | |||
142 | last; | |||
143 | } elsif (exists $bitmask{$s}) { | |||
144 | $bits |= $bitmask{$s}; | |||
145 | } elsif ($EXPORT_OK{$s}) { | |||
146 | _do_install(); | |||
147 | require Exporter; | |||
148 | re->export_to_level(2, 're', $s); | |||
149 | } else { | |||
150 | require Carp; | |||
151 | Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", | |||
152 | join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), | |||
153 | ")"); | |||
154 | } | |||
155 | } | |||
156 | $bits; | |||
157 | } | |||
158 | ||||
159 | # spent 42µs (21+21) within re::import which was called
# once (21µs+21µs) at line 46 of /opt/wise/lib/perl5/5.10.0/File/Basename.pm | |||
160 | 2 | 8.0e-6 | 4.0e-6 | shift; |
161 | $^H |= bits(1, @_); # spent 21µs making 1 call to re::bits | |||
162 | } | |||
163 | ||||
164 | sub unimport { | |||
165 | shift; | |||
166 | $^H &= ~ bits(0, @_); | |||
167 | } | |||
168 | ||||
169 | 1 | 2.3e-5 | 2.3e-5 | 1; |
170 | ||||
171 | __END__ | |||
172 | ||||
173 | =head1 NAME | |||
174 | ||||
175 | re - Perl pragma to alter regular expression behaviour | |||
176 | ||||
177 | =head1 SYNOPSIS | |||
178 | ||||
179 | use re 'taint'; | |||
180 | ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here | |||
181 | ||||
182 | $pat = '(?{ $foo = 1 })'; | |||
183 | use re 'eval'; | |||
184 | /foo${pat}bar/; # won't fail (when not under -T switch) | |||
185 | ||||
186 | { | |||
187 | no re 'taint'; # the default | |||
188 | ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here | |||
189 | ||||
190 | no re 'eval'; # the default | |||
191 | /foo${pat}bar/; # disallowed (with or without -T switch) | |||
192 | } | |||
193 | ||||
194 | use re 'debug'; # output debugging info during | |||
195 | /^(.*)$/s; # compile and run time | |||
196 | ||||
197 | ||||
198 | use re 'debugcolor'; # same as 'debug', but with colored output | |||
199 | ... | |||
200 | ||||
201 | use re qw(Debug All); # Finer tuned debugging options. | |||
202 | use re qw(Debug More); | |||
203 | no re qw(Debug ALL); # Turn of all re debugging in this scope | |||
204 | ||||
205 | use re qw(is_regexp regexp_pattern); # import utility functions | |||
206 | my ($pat,$mods)=regexp_pattern(qr/foo/i); | |||
207 | if (is_regexp($obj)) { | |||
208 | print "Got regexp: ", | |||
209 | scalar regexp_pattern($obj); # just as perl would stringify it | |||
210 | } # but no hassle with blessed re's. | |||
211 | ||||
212 | (We use $^X in these examples because it's tainted by default.) | |||
213 | ||||
214 | =head1 DESCRIPTION | |||
215 | ||||
216 | =head2 'taint' mode | |||
217 | ||||
218 | When C<use re 'taint'> is in effect, and a tainted string is the target | |||
219 | of a regex, the regex memories (or values returned by the m// operator | |||
220 | in list context) are tainted. This feature is useful when regex operations | |||
221 | on tainted data aren't meant to extract safe substrings, but to perform | |||
222 | other transformations. | |||
223 | ||||
224 | =head2 'eval' mode | |||
225 | ||||
226 | When C<use re 'eval'> is in effect, a regex is allowed to contain | |||
227 | C<(?{ ... })> zero-width assertions even if regular expression contains | |||
228 | variable interpolation. That is normally disallowed, since it is a | |||
229 | potential security risk. Note that this pragma is ignored when the regular | |||
230 | expression is obtained from tainted data, i.e. evaluation is always | |||
231 | disallowed with tainted regular expressions. See L<perlre/(?{ code })>. | |||
232 | ||||
233 | For the purpose of this pragma, interpolation of precompiled regular | |||
234 | expressions (i.e., the result of C<qr//>) is I<not> considered variable | |||
235 | interpolation. Thus: | |||
236 | ||||
237 | /foo${pat}bar/ | |||
238 | ||||
239 | I<is> allowed if $pat is a precompiled regular expression, even | |||
240 | if $pat contains C<(?{ ... })> assertions. | |||
241 | ||||
242 | =head2 'debug' mode | |||
243 | ||||
244 | When C<use re 'debug'> is in effect, perl emits debugging messages when | |||
245 | compiling and using regular expressions. The output is the same as that | |||
246 | obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the | |||
247 | B<-Dr> switch. It may be quite voluminous depending on the complexity | |||
248 | of the match. Using C<debugcolor> instead of C<debug> enables a | |||
249 | form of output that can be used to get a colorful display on terminals | |||
250 | that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a | |||
251 | comma-separated list of C<termcap> properties to use for highlighting | |||
252 | strings on/off, pre-point part on/off. | |||
253 | See L<perldebug/"Debugging regular expressions"> for additional info. | |||
254 | ||||
255 | As of 5.9.5 the directive C<use re 'debug'> and its equivalents are | |||
256 | lexically scoped, as the other directives are. However they have both | |||
257 | compile-time and run-time effects. | |||
258 | ||||
259 | See L<perlmodlib/Pragmatic Modules>. | |||
260 | ||||
261 | =head2 'Debug' mode | |||
262 | ||||
263 | Similarly C<use re 'Debug'> produces debugging output, the difference | |||
264 | being that it allows the fine tuning of what debugging output will be | |||
265 | emitted. Options are divided into three groups, those related to | |||
266 | compilation, those related to execution and those related to special | |||
267 | purposes. The options are as follows: | |||
268 | ||||
269 | =over 4 | |||
270 | ||||
271 | =item Compile related options | |||
272 | ||||
273 | =over 4 | |||
274 | ||||
275 | =item COMPILE | |||
276 | ||||
277 | Turns on all compile related debug options. | |||
278 | ||||
279 | =item PARSE | |||
280 | ||||
281 | Turns on debug output related to the process of parsing the pattern. | |||
282 | ||||
283 | =item OPTIMISE | |||
284 | ||||
285 | Enables output related to the optimisation phase of compilation. | |||
286 | ||||
287 | =item TRIEC | |||
288 | ||||
289 | Detailed info about trie compilation. | |||
290 | ||||
291 | =item DUMP | |||
292 | ||||
293 | Dump the final program out after it is compiled and optimised. | |||
294 | ||||
295 | =back | |||
296 | ||||
297 | =item Execute related options | |||
298 | ||||
299 | =over 4 | |||
300 | ||||
301 | =item EXECUTE | |||
302 | ||||
303 | Turns on all execute related debug options. | |||
304 | ||||
305 | =item MATCH | |||
306 | ||||
307 | Turns on debugging of the main matching loop. | |||
308 | ||||
309 | =item TRIEE | |||
310 | ||||
311 | Extra debugging of how tries execute. | |||
312 | ||||
313 | =item INTUIT | |||
314 | ||||
315 | Enable debugging of start point optimisations. | |||
316 | ||||
317 | =back | |||
318 | ||||
319 | =item Extra debugging options | |||
320 | ||||
321 | =over 4 | |||
322 | ||||
323 | =item EXTRA | |||
324 | ||||
325 | Turns on all "extra" debugging options. | |||
326 | ||||
327 | =item BUFFERS | |||
328 | ||||
329 | Enable debugging the capture buffer storage during match. Warning, | |||
330 | this can potentially produce extremely large output. | |||
331 | ||||
332 | =item TRIEM | |||
333 | ||||
334 | Enable enhanced TRIE debugging. Enhances both TRIEE | |||
335 | and TRIEC. | |||
336 | ||||
337 | =item STATE | |||
338 | ||||
339 | Enable debugging of states in the engine. | |||
340 | ||||
341 | =item STACK | |||
342 | ||||
343 | Enable debugging of the recursion stack in the engine. Enabling | |||
344 | or disabling this option automatically does the same for debugging | |||
345 | states as well. This output from this can be quite large. | |||
346 | ||||
347 | =item OPTIMISEM | |||
348 | ||||
349 | Enable enhanced optimisation debugging and start point optimisations. | |||
350 | Probably not useful except when debugging the regex engine itself. | |||
351 | ||||
352 | =item OFFSETS | |||
353 | ||||
354 | Dump offset information. This can be used to see how regops correlate | |||
355 | to the pattern. Output format is | |||
356 | ||||
357 | NODENUM:POSITION[LENGTH] | |||
358 | ||||
359 | Where 1 is the position of the first char in the string. Note that position | |||
360 | can be 0, or larger than the actual length of the pattern, likewise length | |||
361 | can be zero. | |||
362 | ||||
363 | =item OFFSETSDBG | |||
364 | ||||
365 | Enable debugging of offsets information. This emits copious | |||
366 | amounts of trace information and doesn't mesh well with other | |||
367 | debug options. | |||
368 | ||||
369 | Almost definitely only useful to people hacking | |||
370 | on the offsets part of the debug engine. | |||
371 | ||||
372 | =back | |||
373 | ||||
374 | =item Other useful flags | |||
375 | ||||
376 | These are useful shortcuts to save on the typing. | |||
377 | ||||
378 | =over 4 | |||
379 | ||||
380 | =item ALL | |||
381 | ||||
382 | Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS | |||
383 | ||||
384 | =item All | |||
385 | ||||
386 | Enable DUMP and all execute options. Equivalent to: | |||
387 | ||||
388 | use re 'debug'; | |||
389 | ||||
390 | =item MORE | |||
391 | ||||
392 | =item More | |||
393 | ||||
394 | Enable TRIEM and all execute compile and execute options. | |||
395 | ||||
396 | =back | |||
397 | ||||
398 | =back | |||
399 | ||||
400 | As of 5.9.5 the directive C<use re 'debug'> and its equivalents are | |||
401 | lexically scoped, as the other directives are. However they have both | |||
402 | compile-time and run-time effects. | |||
403 | ||||
404 | =head2 Exportable Functions | |||
405 | ||||
406 | As of perl 5.9.5 're' debug contains a number of utility functions that | |||
407 | may be optionally exported into the caller's namespace. They are listed | |||
408 | below. | |||
409 | ||||
410 | =over 4 | |||
411 | ||||
412 | =item is_regexp($ref) | |||
413 | ||||
414 | Returns true if the argument is a compiled regular expression as returned | |||
415 | by C<qr//>, false if it is not. | |||
416 | ||||
417 | This function will not be confused by overloading or blessing. In | |||
418 | internals terms, this extracts the regexp pointer out of the | |||
419 | PERL_MAGIC_qr structure so it it cannot be fooled. | |||
420 | ||||
421 | =item regexp_pattern($ref) | |||
422 | ||||
423 | If the argument is a compiled regular expression as returned by C<qr//>, | |||
424 | then this function returns the pattern. | |||
425 | ||||
426 | In list context it returns a two element list, the first element | |||
427 | containing the pattern and the second containing the modifiers used when | |||
428 | the pattern was compiled. | |||
429 | ||||
430 | my ($pat, $mods) = regexp_pattern($ref); | |||
431 | ||||
432 | In scalar context it returns the same as perl would when strigifying a raw | |||
433 | C<qr//> with the same pattern inside. If the argument is not a compiled | |||
434 | reference then this routine returns false but defined in scalar context, | |||
435 | and the empty list in list context. Thus the following | |||
436 | ||||
437 | if (regexp_pattern($ref) eq '(?i-xsm:foo)') | |||
438 | ||||
439 | will be warning free regardless of what $ref actually is. | |||
440 | ||||
441 | Like C<is_regexp> this function will not be confused by overloading | |||
442 | or blessing of the object. | |||
443 | ||||
444 | =item regmust($ref) | |||
445 | ||||
446 | If the argument is a compiled regular expression as returned by C<qr//>, | |||
447 | then this function returns what the optimiser consiers to be the longest | |||
448 | anchored fixed string and longest floating fixed string in the pattern. | |||
449 | ||||
450 | A I<fixed string> is defined as being a substring that must appear for the | |||
451 | pattern to match. An I<anchored fixed string> is a fixed string that must | |||
452 | appear at a particular offset from the beginning of the match. A I<floating | |||
453 | fixed string> is defined as a fixed string that can appear at any point in | |||
454 | a range of positions relative to the start of the match. For example, | |||
455 | ||||
456 | my $qr = qr/here .* there/x; | |||
457 | my ($anchored, $floating) = regmust($qr); | |||
458 | print "anchored:'$anchored'\nfloating:'$floating'\n"; | |||
459 | ||||
460 | results in | |||
461 | ||||
462 | anchored:'here' | |||
463 | floating:'there' | |||
464 | ||||
465 | Because the C<here> is before the C<.*> in the pattern, its position | |||
466 | can be determined exactly. That's not true, however, for the C<there>; | |||
467 | it could appear at any point after where the anchored string appeared. | |||
468 | Perl uses both for its optimisations, prefering the longer, or, if they are | |||
469 | equal, the floating. | |||
470 | ||||
471 | B<NOTE:> This may not necessarily be the definitive longest anchored and | |||
472 | floating string. This will be what the optimiser of the Perl that you | |||
473 | are using thinks is the longest. If you believe that the result is wrong | |||
474 | please report it via the L<perlbug> utility. | |||
475 | ||||
476 | =item regname($name,$all) | |||
477 | ||||
478 | Returns the contents of a named buffer of the last successful match. If | |||
479 | $all is true, then returns an array ref containing one entry per buffer, | |||
480 | otherwise returns the first defined buffer. | |||
481 | ||||
482 | =item regnames($all) | |||
483 | ||||
484 | Returns a list of all of the named buffers defined in the last successful | |||
485 | match. If $all is true, then it returns all names defined, if not it returns | |||
486 | only names which were involved in the match. | |||
487 | ||||
488 | =item regnames_count() | |||
489 | ||||
490 | Returns the number of distinct names defined in the pattern used | |||
491 | for the last successful match. | |||
492 | ||||
493 | B<Note:> this result is always the actual number of distinct | |||
494 | named buffers defined, it may not actually match that which is | |||
495 | returned by C<regnames()> and related routines when those routines | |||
496 | have not been called with the $all parameter set. | |||
497 | ||||
498 | =back | |||
499 | ||||
500 | =head1 SEE ALSO | |||
501 | ||||
502 | L<perlmodlib/Pragmatic Modules>. | |||
503 | ||||
504 | =cut |