programming-examples/perl/Regular_Expression/Character Class.pl

#Character Class: Alternative Characters!

Metacharacter               What It Matches

was|were|will               Matches one of was, were, or will


#Character Class: Anchored Characters!

Metacharacter               What It Matches
\b                          Matches a word boundary (when not inside [ ])
\B                          Matches a nonword boundary
^                           Matches to beginning of line
$                           Matches to end of line
\A                          Matches the beginning of the string only
\Z                          Matches the end of the string or line
\z                          Matches the end of string only
\G                          Matches where previous m//g left off


#Character Class: Miscellaneous Characters!

Metacharacter               What It Matches

\12                         Matches that octal value, up to \377
\x811                       Matches that hex value
\cX                         Matches that control character; e.g., \cC is <Ctrl>-C and \cV is <Ctrl>-V
\e                          Matches the ASCII ESC character, not backslash
\E                          Marks the end of changing case with \U, \L, or \Q
\l                          Lowercase the next character only
\L                          Lowercase characters until the end of the string or until \E
\N                          Matches that named character; e.g., \N{greek:Beta}
\p{PROPERTY}                Matches any character with the named property; e.g., \p{IsAlpha}/
\P{PROPERTY}                Matches any character without the named property
\Q                          Quote metacharacters until \E
\u                          Titlecase next character only
\U                          Uppercase until \E
\x{NUMBER}                  Matches Unicode NUMBER given in hexadecimal
\X                          Matches Unicode "combining character sequence" string
\[                          Matches that metacharacter
\\                          Matches a backslash


#Character Class: Remembered Characters!

Metacharacter               What It Matches
(string)                    Used for backreferencing (see Examples 9.38 and 9.39)
\1 or $1                    Matches first set of parentheses[a]
\2 or $2                    Matches second set of parentheses
\3 or $3                    Matches third set of parenthes


#Character Class: Repeated Characters!

Metacharacter               What It Matches

x?                          Matches 0 or 1 x
x*                          Matches 0 or more occurrences of x
x+                          Matches 1 or more occurrences of x
(xyz)+                      Matches 1 or more patterns of xyz
x{m,n}                      Matches at least m occurrences of x and no more than n occurrences of x


 #Character Class: Whitespace Characters!

Metacharacter               What It Matches
\s                          Matches a whitespace character, such as spaces, tabs, and newlines
\S                          Matches nonwhitespace character
\n                          Matches a newline
\r                          Matches a return
\t                          Matches a tab
\f                          Matches a form feed
\b                          Matches a backspace
\0                          Matches a null character


#Match any number of alphanumerics followed by a single space character!

$p = "This is a pattern test.";

if ($p =~ /(\w*\s)/){ print "$1\n"; }


#Match at least one alphanumeric followed by a single space character!

if ($p =~ /(\w+\s)/){ print "$1\n"; }


#Posix and Unicode Classes!

Shortcut              Expansion                                       Description
[[:alpha:]]           [a-zA-Z]                                        An alphabetic character.
[[:alnum:]]           [0-9A-Za-z]                                     An alphabetic or numeric character.
[[:digit:]]           \d                                              A digit, 0-9.
[[:lower:]]           [a-z]                                           A lower case letter.
[[:upper:]]           [A-Z]                                           An upper case letter.
[[:punct:]]           [!"#$%&'()*+,-./:;<=>?@\[\\\]^_`{|}~]           A punctuation character  note the escaped characters [, \, and ].

/bea?t/ Matches either 'beat' or 'bet'
/bea+t/ Matches 'beat', 'beaat', 'beaaat'
/bea*t/ Matches 'bet', 'beat', 'beaat'


#The bracket metacharacters and negation!

while(<DATA>){
    print if / [^123]0/
}
__DATA__
    101
    201
    301
    401
    501
    601