perl regex
Regular Expression
- / reg exp /
- # match string, returns 1 or 0 (True/False)
if (/ab/) {} # if $_ matches /ab/
- / reg exp /i
- # ignore case
- $x=~/ reg exp/
- # match using $x, not $_
$a=""abcdef"";
if ($a=~/def/) { }
if (<STDIN>=~/^[yY]/ {#yes}
- m#regexp#
- #allows different delimiters to be used. Good if using lots of '/' in regexp.
m@usr@ #same as /usr/ m*usr/bin* #same as /usr\/bin/ m#^/usr/etc# #same as /^\/usr\/etc/
- s/regexp/replacement str/
- # Substitute on $_
s/ab*c/def/ # replace ab*c w/ 'def'
- s/regexp/repstr/g
- # global sub, not just the 1st match # returns # of letters that it replaced.
- s#regexp#rep str#
- #can use any char as delimiter as long as they are 3. s@abc@123@ #same as s/abc/123/
- $x=~s/regexp/repstr/;
- =~ alt target instead of $_ $d{""t""}=~s/^/x/; #prepend 'x' to hash var's value
- $1,$2,$3,...
- #memorized part of reg exp are saved in $1,$2,... $_=""abc""; /(.)(.)(.)/; # $1='a',$2='b',$3='c'; $_=""a test""; s/(w+)/<$1>/g; # $_=""<a> <test>""
- ($var1,$var2)=/(.)(.)(.)/;
- # $var1='a',$var2='b', alternative to $1,$2
- $&
- # / / saves the matched string in this var.
$_=""abcdef""; /cd/; # $& now contains 'cd'
$`
- / / saves the string before match, (backquote)
- $` is 'ab'
$'
- / / saves the string after match, (single quote)
- $' is now 'ef'
REG EXP CHARS
- .
- # match any char except n
[ ]
#match 1 char
/[abc]/ # if str contains 'a','b',or 'c'
/[a-z]/ # any lower-case alphabet char
/[0-9\-]/ #0-9 or '-'
/[a-z0-9A-Z_]/ #alphanumeric or '_'
[^ ]
#match except char
/[^0-9]/ #match any non-digit char
/[^aeiou]/ #match anything except lower-case vowel
\d
# [0-9] shortcut
\D
# [^0-9]
\w
#[a-zA-Z0-9_]
\W
#[^a-zA-Z0-9_]
\s
#[ \r\t\n\f]
\S
#[^ \r\t\n\f]
mixed example:
/[\da-fA-F]/ #matches 1 hex digit /\d/ # match if 1st char is a digit
*
# multiple, 0 or more
/ab*c/ # a,0 or more b, c
+
# multiple, 1 or more
/ab+c/
s/x+/QQ/ # if $_=""a xxx b"" -> ""a QQ b""
?
#multiple, 0 or 1, no more
/ab?c/
/x{min,max}/
# general multipler
/t{5,10}/ #matches 5-10 't's.
/t{5,}/ # same as /tttt+/
/t{5}/ # same as /ttttt/
/t{0,5}/ # 5 or less t's
/a.{5}b/ #a separated by any 5 char and b.
Greedy Rule
Leftmost is greediest
$_=""a xxx c xxx c xxx d"";
/a.*c.*d/; # matches 2nd cxx, not 1st cxx
/a.*?c.*d/; # lazy,non-greedy, matches 1st cxx
using "".*?"" can make it non-greedy. Not sure???
Automatically backtracks to find correct match
$_=""/a xxx ce xxx ci xx d"";
/a.*ce.*d/ # automatically backtracks
/( )/
# store in memory. Also sets precedence. See Precedence.
/fred(.)barney/ # all characters between fred and barney are stored in 1st mem
/ \1 \2 \3 /
#recall memory. \1=1st mem, \2=2nd mem, \3=3rd mem...
# They are also stored in var $1,$2,$3,...
/fred(.)barney\1/ # match fred, any char, barney, any char (that is same as 1st any char)
# not same as /fred.barney./ since 1st char must match 2nd char.
/a(.)b(.)c\2d\1/;
# 'a',any char,'b', any char, c, char (same as after b),d, char(same as after a)
/a(.*)b\1c/;
# 'a',any # of char,b, same string after 'a', 'c'.
/s1|s2|s3/
# match either words
/red|green|blue/ # match 1 of 3 color
\b
#boundary anchor (ie alphanumeric char boundary)
#if word is alphanumeric, it will form boundary w/ non-alphanumeric
# if word is non-alphanumeric, it will form boundary w/ alphanumeric char
# Must be either at beginning or end. /\b.../, /...\b/
/fred\b/ # match fred, but not freddy
/\bmo/ #match moby, but not elmo
/\b\+\b/ # match ""x+y"" but not ""++""
/ABC\bDEF/ #impossible bc \b must be either beginning or end
\B
#require no boundary
/\bFred\B/, matches ""Frederick"", but not ""Fred Foe"" since boundary exists after Fred.
^
# beginning of line, must be the 1st char.
/^a/ # 1st letter must be 'a'.
/aaa^/ # Wrong. Cannot have ^ at anywhere except 1st
s/^/x/; #^ by itself means prepend. Prepend ""x"" to $_.
# don't be confused with [^]
$
# end of line, must be last char
/c$/ # last char must be 'c'
/$cccc/ #Wrong. It looks for var $cccc instead.
$var
#all vars are interpolated.
$word=""ab*c"";
if (/$word/) { } #becomes /ab*c/
\Q...\E
#use literal, vars are interpolated but as literal, not reg exp.
$word=""ab*c"";
if (/\Q$word\E) { } #becomes /ab\*c/, not /ab*c/
Precedence
- ( ), (?: ) #paren
- ?+* {m,n} #multiplier
- abc ^$b #sequence, anchoring
- a|b #alternative
/a|b*/ = a|(b*), not (a|b)* abc*= ab,abc,abcc,abccc,... (abc)*="""",abc,abcabc,abcabcabc,... ^x|y= matches x as 1st char, or y anywhere ^(x|y) = 1st char/word must be x or y a|bc|d = a,bc,or d (a|b)(c|d)= ac,bc,ad,bd (red|blue)car= redcar , bluecar
TRANSLITERATION
tr/old/new/;
#subst old pattern with new pattern,
tr/ab/ba;
#subst a->b, b->a.
tr/a-z/A-Z/;
# capitalize.
Tr/a-z/x/;
# replace all lower-case to 'x'
tr/a-z/ABC/;
# cap a-c. all other lower-case to 'C'
tr/a-z/x/d;
# ""d""= delete all non-matching char
# ie a becomes x 'b-z' are deleted. Others are not deleted.
# 'abcDEF' -> 'xDEF'
$count=tr/../../;
# Counts
$count=tr/a/z//;
# counts # of letters that are lower-case.
$count=tr/a-z/A-Z/;
# upper-case and counts # of chars that were converted
tr/../../c;
$count=tr/a-z//c;
# 'c' compliment, ie all except
# count all letter except lowercase a-z.
$count=tr/a-z/_/c;
# replace all char to '_' except a-z
$count=tr/a-z//cd;
# delete all letter except a-z
tr/../../s
# 'S'=squeeze, ie multiple letter are replaced by 1 letter if same.
$_='abcccc'; tr/c/x/s;
# result: 'abx'
$_='bc def ghi'; tr/a-z/x/s;
# result: 'x x x'
$_='abc -def--ghi'; tr/a-z/_/cs;
# result: 'abc_def_ghi'
REG EXP FUNCTIONS
@a=split(/regexp/,$str) ;
#splits line into array of words at the regexp match.
$str= $_ by default.
$_=""abc:def:123"";
@a=split(/:/) ; # @a=(abc,def,123)
@a=split;
# same as @a=split(/\s+/,$_) ; #nice shortcut.
$bigString=join($glue,@list) ;
# opposite of split, joins array of str into 1 big string
@list=(a,b,c) ;
$result=join(':"",@list) ; #result=""a:b:c"";
$result=join(""+"",""5"",""7"") ; # =""5+7""
BASIC STANDARD I/O
$a=<STDIN>;
# read line
while (defined($line=<STDIN>)) {}
while (<STDIN>) { chomp} # shorter version of above
@a=<STDIN>;
# read multiple lines
<>
Gets arg from @ARGV. @ARGV contains cmdline arg
a.pl : While (<>) {print;}
>a.pl a.txt b.txt c.txt
# outputs all text files. If no arg is defined, it uses STDIN.
# sim to 'cat'
printf() ;
# just like C
open(FILEHANDLE,""filename"") ;
#Open file to read.
Open(MY_TEXT,""me.txt"") ;
open(FILEHANDLE,"">out.txt"") ;
#'>' = WRITE mode
open(FILEHANDLE,"">>append.txt"") ;
#'>>'=APPEND mode
close(FILEHANDLE) ;
#close file.
Close (MY_TEXT) ;
$a=<FILEHANDLE>;
#Read a line from file.
@a=<FILEHANDLE>;
#read all lines from file.
Open(PWD,""/etc/passwd"") ;
While (<PWD>) { print}
# each line already contains \n so no need to print \n.
print FILEHANDLE "" "";
#write line to file
print MY_TEXT ""Hello\n"";
CopyFile Ex
Open(IN,$a) || die $!;
Open(OUT,$b) || die $!;
While (<IN>) { print OUT $_;}
Close (IN) || die;
Close (OUT) || die;
FILE TEST FUNC ""-X""
Works on both filename and FILEHANDLE
-e
#filename exists
if (-e $filename) { }
-r
#file is readable
if (-r $filename) { }
-w
#is writable
-x
#is executable
-o
#is owned by user
-R,-W,-X,-O
# is readable,writable,... by real user, not effective user
-z
# file exists and has 0 size (dir is never 0)
-s
# file/dir exists and is non-zero size
-f
# is a file
-d
# is a dir
-l
#is a symbolic (soft) link
-S
# is a socket
-p
# is a pipe
-b
# block-special file (mountable disk)
-c
# char-special file (I/O device)
-u
# is setuid
-g
# is setgid
-k
#has sticky bit set
-t
#isatty() is true
-T
#is a text file
-B
#is a binary file
-M
# returns modified date age in # of days
-A
#returns accessed date age in # of days
-C
#returns inode mod age in # of days
