# -*- python -*- # vim:set ft=python: # @@PLEAC@@_NAME # @@SKIP@@ Python # @@PLEAC@@_WEB # @@SKIP@@ http://www.python.org # @@PLEAC@@_INTRO # @@SKIP@@ The latest version of Python is 2.4 but users of 2.3 and 2.2 (and # @@SKIP@@ in some cases earlier versions) can use the code herein. # @@SKIP@@ Users of 2.2 and 2.3 should install or copy code from utils.py # @@SKIP@@ (http://aima.cs.berkeley.edu/python/utils.py) # @@SKIP@@ [the first section provides compatability code with 2.4] # @@SKIP@@ Users of 2.2 should install optik (http://optik.sourceforge.com) # @@SKIP@@ [for optparse and textwrap] # @@SKIP@@ Where a 2.3 or 2.4 feature is unable to be replicated, an effort # @@SKIP@@ has been made to provide a backward-compatible version in addition # @@SKIP@@ to one using modern idioms. # @@SKIP@@ Examples which translate the original Perl closely but which are # @@SKIP@@ unPythonic are prefixed with a comment stating "DON'T DO THIS". # @@SKIP@@ In some cases, it may be useful to know the techniques in these, # @@SKIP@@ though it's a bad solution for the specific problem. # @@PLEAC@@_1.0 #----------------------------- mystr = "\n" # a newline character mystr = r"\n" # two characters, \ and n #----------------------------- mystr = "Jon 'Maddog' Orwant" # literal single quote inside double quotes mystr = 'Jon "Maddog" Orwant' # literal double quote inside single quotes #----------------------------- mystr = 'Jon \'Maddog\' Orwant' # escaped single quote mystr = "Jon \"Maddog\" Orwant" # escaped double quote #----------------------------- mystr = """ This is a multiline string literal enclosed in triple double quotes. """ mystr = ''' And this is a multiline string literal enclosed in triple single quotes. ''' #----------------------------- # @@PLEAC@@_1.1 #----------------------------- # get a 5-char string, skip 3, then grab 2 8-char strings, then the rest # Note that struct.unpack cannot use * for an unknown length. # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65224 import struct (lead, s1, s2), tail = struct.unpack("5s 3x 8s 8s", data[:24]), data[24:] # split at five-char boundaries fivers = struct.unpack("5s" * (len(data)//5), data) fivers = print [x[i*5:i*5+5] for i in range(len(x)/5)] # chop string into individual characters chars = list(data) #----------------------------- mystr = "This is what you have" # +012345678901234567890 Indexing forwards (left to right) # 109876543210987654321- Indexing backwards (right to left) # note that 0 means 10 or 20, etc. above first = mystr[0] # "T" start = mystr[5:7] # "is" rest = mystr[13:] # "you have" last = mystr[-1] # "e" end = mystr[-4:] # "have" piece = mystr[-8:-5] # "you" #----------------------------- # Python strings are immutable. # In general, you should just do piecemeal reallocation: mystr = "This is what you have" mystr = mystr[:5] + "wasn't" + mystr[7:] # Or replace and reallocate mystr = "This is what you have" mystr = mystr.replace(" is ", " wasn't ") # DON'T DO THIS: In-place modification could be done using character arrays import array mystr = array.array("c", "This is what you have") mystr[5:7] = array.array("c", "wasn't") # mystr is now array('c', "This wasn't what you have") # DON'T DO THIS: It could also be done using MutableString from UserString import MutableString mystr = MutableString("This is what you have") mystr[-12:] = "ondrous" # mystr is now "This is wondrous" #----------------------------- # you can test simple substrings with "in" (for regex matching see ch.6): if txt in mystr[-10:]: print "'%s' found in last 10 characters"%txt # Or use the startswith() and endswith() string methods: if mystr.startswith(txt): print "%s starts with %s."%(mystr, txt) if mystr.endswith(txt): print "%s ends with %s."%(mystr, txt) #----------------------------- # @@PLEAC@@_1.2 #----------------------------- # Introductory Note: quite a bit of this section is not terribly Pythonic # as names must be set before being used. For instance, unless myvar has # been previously defined, these next lines will all raise NameError: myvar = myvar or some_default myvar2 = myvar or some_default myvar |= some_default # bitwise-or, not logical-or - for demo # The standard way of setting a default is often: myvar = default_value if some_condition: pass # code which may set myvar to something else # if myvar is returned from a function and may be empty/None, then use: myvar = somefunc() if not myvar: myvar = default_value # If you want a default value that can be overridden by the person calling # your code, you can often wrap it in a function with a named parameter: def myfunc(myvar="a"): return myvar + "b" print myfunc(), myfunc("c") #=> ab cb # Note, though, that this won't work for mutable objects such as lists or # dicts that are mutated in the function as the object is only created once # and repeated calls to the same function will return the same object. This # can be desired behaviour however - see section 10.3, for instance. def myfunc(myvar=[]): myvar.append("x") return myvar print myfunc(), myfunc() #=> ['x'] ['x', 'x'] # You need to do: def myfunc(myvar=None): if myvar is None: myvar = [] myvar.append("x") return myvar print myfunc(), myfunc() #=> ['x'] ['x'] #=== Perl Equivalencies start here # use b if b is true, otherwise use c a = b or c # as that is a little tricksy, the following may be preferred: if b: a = b else: a = c # set x to y unless x is already true if not x: x = y #----------------------------- # use b if b is defined, else c try: a = b except NameError: a = c #----------------------------- foo = bar or "DEFAULT VALUE" #----------------------------- # To get a user (for both UNIX and Windows), use: import getpass user = getpass.getuser() # DON'T DO THIS: find the user name on Unix systems import os user = os.environ.get("USER") if user is None: user = os.environ.get("LOGNAME") #----------------------------- if not starting_point: starting_point = "Greenwich" #----------------------------- if not a: # copy only if empty a = b if b: # assign b if nonempty, else c a = b else: a = c #----------------------------- # @@PLEAC@@_1.3 #----------------------------- v1, v2 = v2, v1 #----------------------------- # DON'T DO THIS: temp = a a = b b = temp #----------------------------- a = "alpha" b = "omega" a, b = b, a # the first shall be last -- and versa vice #----------------------------- alpha, beta, production = "January March August".split() alpha, beta, production = beta, production, alpha #----------------------------- # @@PLEAC@@_1.4 #----------------------------- num = ord(char) char = chr(num) #----------------------------- char = "%c" % num print "Number %d is character %c" % (num, num) print "Number %(n)d is character %(n)c" % {"n": num} print "Number %(num)d is character %(num)c" % locals() #=> Number 101 is character e #----------------------------- ascii_character_numbers = [ord(c) for c in "sample"] print ascii_character_numbers #=> [115, 97, 109, 112, 108, 101] word = "".join([chr(n) for n in ascii_character_numbers]) word = "".join([chr(n) for n in [115, 97, 109, 112, 108, 101]]) print word #=> sample #----------------------------- hal = "HAL" ibm = "".join([chr(ord(c)+1) for c in hal]) # add one to each ASCII value print ibm #=> IBM #----------------------------- # @@PLEAC@@_1.5 #----------------------------- mylist = list(mystr) #----------------------------- for char in mystr: pass # do something with char #----------------------------- mystr = "an apple a day" uniq = sorted(set(mystr)) print "unique chars are: '%s'" % "".join(uniq) #=> unique chars are: ' adelnpy' #----------------------------- ascvals = [ord(c) for c in mystr] print "total is %s for '%s'."%(sum(ascvals), mystr) #=> total is 1248 for 'an apple a day'. #----------------------------- # sysv checksum def checksum(myfile): values = [ord(c) for line in myfile for c in line] return sum(values)%(2**16) - 1 import fileinput print checksum(fileinput.input()) # data from sys.stdin # Using a function means any iterable can be checksummed: print checksum(open("C:/test.txt") # data from file print checksum("sometext") # data from string #----------------------------- #!/usr/bin/python # slowcat - emulate a s l o w line printer # usage: slowcat [- DELAY] [files ...] import sys, select import re DELAY = 1 if re.match("^-\d+$",sys.argv[1]): DELAY=-int(sys.argv[1]) del sys.argv[1] for ln in fileinput.input(): for c in ln: sys.stdout.write(c) sys.stdout.flush() select.select([],[],[], 0.005 * DELAY) #----------------------------- # @@PLEAC@@_1.6 #----------------------------- # 2.3+ only revchars = mystr[::-1] # extended slice - step is -1 revwords = " ".join(mystr.split(" ")[::-1]) # pre 2.3 version: mylist = list(mystr) mylist.reverse() revbytes = "".join(mylist) mylist = mystr.split() mylist.reverse() revwords = ' '.join(mylist) # Alternative version using reversed(): revchars = "".join(reversed(mystr)) revwords = " ".join(reversed(mystr.split(" "))) # reversed() makes an iterator, which means that the reversal # happens as it is consumed. This means that "print reversed(mystr)" is not # the same as mystr[::-1]. Standard usage is: for char in reversed(mystr): pass # ... do something #----------------------------- # 2.3+ only word = "reviver" is_palindrome = (word == word[::-1]) #----------------------------- # Generator version def get_palindromes(fname): for line in open(fname): word = line.rstrip() if len(word) > 5 and word == word[::-1]: yield word long_palindromes = list(get_palindromes("/usr/share/dict/words")) # Simpler old-style version using 2.2 string reversal def rev_string(mystr): mylist = list(mystr) mylist.reverse() return "".join(mylist) long_palindromes=[] for line in open("/usr/share/dict/words"): word = line.rstrip() if len(word) > 5 and word == rev_string(word): long_palindromes.append(word) print long_palindromes #----------------------------- # @@PLEAC@@_1.7 #----------------------------- mystr.expandtabs() mystr.expandtabs(4) #----------------------------- # @@PLEAC@@_1.8 #----------------------------- text = "I am %(rows)s high and %(cols)s long"%{"rows":24, "cols":80) print text #=> I am 24 high and 80 long rows, cols = 24, 80 text = "I am %(rows)s high and %(cols)s long"%locals() print text #=> I am 24 high and 80 long #----------------------------- import re print re.sub("\d+", lambda i: str(2 * int(i.group(0))), "I am 17 years old") #=> I am 34 years old #----------------------------- # expand variables in text, but put an error message in # if the variable isn't defined class SafeDict(dict): def __getitem__(self, key): return self.get(key, "[No Variable: %s]"%key) hi = "Hello" text = "%(hi)s and %(bye)s!"%SafeDict(locals()) print text #=> Hello and [No Variable: bye]! #If you don't need a particular error message, just use the Template class: from string import Template x = Template("$hi and $bye!") hi = "Hello" print x.safe_substitute(locals()) #=> Hello and $bye! print x.substitute(locals()) # will throw a KeyError #----------------------------- # @@PLEAC@@_1.9 #----------------------------- mystr = "bo peep".upper() # BO PEEP mystr = mystr.lower() # bo peep mystr = mystr.capitalize() # Bo peep #----------------------------- beast = "python" caprest = beast.capitalize().swapcase() # pYTHON #----------------------------- print "thIS is a loNG liNE".title() #=> This Is A Long Line #----------------------------- if a.upper() == b.upper(): print "a and b are the same" #----------------------------- import random def randcase_one(letter): if random.randint(0,5): # True on 1, 2, 3, 4 return letter.lower() else: return letter.upper() def randcase(myfile): for line in myfile: yield "".join(randcase_one(letter) for letter in line[:-1]) for line in randcase(myfile): print line #----------------------------- # @@PLEAC@@_1.10 #----------------------------- "I have %d guanacos." % (n + 1) print "I have", n+1, "guanacos." #----------------------------- #Python templates disallow in-string calculations (see PEP 292) from string import Template email_template = Template("""\ To: $address From: Your Bank CC: $cc_number Date: $date Dear $name, Today you bounced check number $checknum to us. Your account is now closed. Sincerely, the management """) import random import datetime person = {"address":"Joe@somewhere.com", "name": "Joe", "cc_number" : 1234567890, "checknum" : 500+random.randint(0,99)} print email_template.substitute(person, date=datetime.date.today()) #----------------------------- # @@PLEAC@@_1.11 #----------------------------- # indenting here documents # # in python multiline strings can be used as here documents var = """ your text goes here """ # using regular expressions import re re_leading_blanks = re.compile("^\s+",re.MULTILINE) var1 = re_leading_blanks.sub("",var)[:-1] # using string methods # split into lines, use every line except first and last, left strip and rejoin. var2 = "\n".join([line.lstrip() for line in var.split("\n")[1:-1]]) poem = """ Here's your poem: Now far ahead the Road has gone, And I must follow, if I can, Pursuing it with eager feet, Until it joins some larger way Where many paths and errand meet. And whither then? I cannot say. --Bilbo in /usr/src/perl/pp_ctl.c """ import textwrap print textwrap.dedent(poem)[1:-1] #----------------------------- # @@PLEAC@@_1.12 #----------------------------- from textwrap import wrap output = wrap(para, initial_indent=leadtab subsequent_indent=nexttab) #----------------------------- #!/usr/bin/env python # wrapdemo - show how textwrap works txt = """\ Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons! """ from textwrap import TextWrapper wrapper = TextWrapper(width=20, initial_indent=" "*4, subsequent_indent=" "*2) print "0123456789" * 2 print wrapper.fill(txt) #----------------------------- """Expected result: 01234567890123456789 Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons! """ #----------------------------- # merge multiple lines into one, then wrap one long line from textwrap import fill import fileinput print fill("".join(fileinput.input())) #----------------------------- # Term::ReadKey::GetTerminalSize() isn't in the Perl standard library. # It isn't in the Python standard library either. Michael Hudson's # recipe from python-list #530228 is shown here. # (http://aspn.activestate.com/ASPN/Mail/Message/python-list/530228) # Be aware that this will work on Unix but not on Windows. from termwrap import wrap import struct, fcntl def getheightwidth(): height, width = struct.unpack( "hhhh", fcntl.ioctl(0, TERMIOS.TIOCGWINSZ ,"\000"*8))[0:2] return height, width # PERL <>, $/, $\ emulation import fileinput import re _, width = getheightwidth() for para in re.split(r"\n{2,}", "".join(fileinput.input())): print fill(para, width) # @@PLEAC@@_1.13 #----------------------------- mystr = '''Mom said, "Don't do that."''' #" re.sub("['\"]", lambda i: "\\" + i.group(0), mystr) re.sub("[A-Z]", lambda i: "\\" + i.group(0), mystr) re.sub("\W", lambda i: "\\" + i.group(0), "is a test!") # no function like quotemeta? # @@PLEAC@@_1.14 #----------------------------- mystr = mystr.lstrip() # left mystr = mystr.rstrip() # right mystr = mystr.strip() # both ends # @@PLEAC@@_1.15 #----------------------------- import csv def parse_csv(line): reader = csv.reader([line], escapechar='\\') return reader.next() line = '''XYZZY,"","O'Reilly, Inc","Wall, Larry","a \\"glug\\" bit,",5,"Error, Core Dumped,",''' #" fields = parse_csv(line) for i, field in enumerate(fields): print "%d : %s" % (i, field) # pre-2.3 version of parse_csv import re def parse_csv(text): pattern = re.compile('''"([^"\\\]*(?:\\\.[^"\\\]*)*)",?|([^,]+),?|,''') mylist = ["".join(elem) for elem in re.findall(pattern, text)] if text[-1] == ",": mylist += [''] return mylist # cvs.reader is meant to work for many lines, something like: # (NB: in Python default, quotechar is *not* escaped by backslash, # but doubled instead. That's what Excel does.) for fields in cvs.reader(lines, dialect="some"): for num, field in enumerate(fields): print num, ":", field #----------------------------- # @@PLEAC@@_1.16 #----------------------------- def soundex(name, len=4): """ soundex module conforming to Knuth's algorithm implementation 2000-12-24 by Gregory Jorgensen public domain """ # digits holds the soundex values for the alphabet digits = '01230120022455012623010202' sndx = '' fc = '' # translate alpha chars in name to soundex digits for c in name.upper(): if c.isalpha(): if not fc: fc = c # remember first letter d = digits[ord(c)-ord('A')] # duplicate consecutive soundex digits are skipped if not sndx or (d != sndx[-1]): sndx += d # replace first digit with first alpha character sndx = fc + sndx[1:] # remove all 0s from the soundex code sndx = sndx.replace('0','') # return soundex code padded to len characters return (sndx + (len * '0'))[:len] user = raw_input("Lookup user: ") if user == "": raise SystemExit name_code = soundex(user) for line in open("/etc/passwd"): line = line.split(":") for piece in line[4].split(): if name_code == soundex(piece): print "%s: %s\n" % line[0], line[4]) #----------------------------- # @@PLEAC@@_1.17 #----------------------------- import sys, fileinput, re data = """\ analysed => analyzed built-in => builtin chastized => chastised commandline => command-line de-allocate => deallocate dropin => drop-in hardcode => hard-code meta-data => metadata multicharacter => multi-character multiway => multi-way non-empty => nonempty non-profit => nonprofit non-trappable => nontrappable pre-define => predefine preextend => pre-extend re-compiling => recompiling reenter => re-enter turnkey => turn-key """ mydict = {} for line in data.split("\n"): if not line.strip(): continue k, v = [word.strip() for word in line.split("=>")] mydict[k] = v pattern_text = "(" + "|".join([re.escape(word) for word in mydict.keys()]) + ")" pattern = re.compile(pattern_text) args = sys.argv[1:] verbose = 0 if args and args[0] == "-v": verbose = 1 args = args[1:] if not args: sys.stderr.write("%s: Reading from stdin\n" % sys.argv[0]) for line in fileinput.input(args, inplace=1, backup=".orig"): output = "" pos = 0 while True: match = pattern.search(line, pos) if not match: output += line[pos:] break output += line[pos:match.start(0)] + mydict[match.group(1)] pos = match.end(0) sys.stdout.write(output) #----------------------------- # @@PLEAC@@_1.18 #----------------------------- #!/usr/bin/python # psgrep - print selected lines of ps output by # compiling user queries into code. # # examples : # psgrep "uid<10" import sys, os, re class PsLineMatch: # each field from the PS header fieldnames = ("flags","uid","pid","ppid","pri","nice","size", \ "rss","wchan","stat","tty","time","command") numeric_fields = ("flags","uid","pid","ppid","pri","nice","size","rss") def __init__(self): self._fields = {} def new_line(self, ln): self._ln = ln.rstrip() # ps header for option "wwaxl" (different than in the perl code) """ F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND" 004 0 1 0 15 0 448 236 schedu S ? 0:07 init" . . . . . . . . . . . . . """ # because only the last entry might contain blanks, splitting # is safe data = self._ln.split(None,12) for fn, elem in zip(self.fieldnames, data): if fn in self.numeric_fields: # make numbers integer self._fields[fn] = int(elem) else: self._fields[fn] = elem def set_query(self, args): # assume args: "uid==500", "command ~ ^wm" conds=[] m = re.compile("(\w+)([=<>]+)(.+)") for a in args: try: (field,op,val) = m.match(a).groups() except: print "can't understand query \"%s\"" % (a) raise SystemExit if field in self.numeric_fields: conds.append(a) else: conds.append("%s%s'%s'",(field,op,val)) self._desirable = compile("(("+")and(".join(conds)+"))", "","eval") def is_desirable(self): return eval(self._desirable, {}, self._fields) def __str__(self): # to allow "print". return self._ln if len(sys.argv)<=1: print """usage: %s criterion ... Each criterion is a Perl expression involving: %s All criteria must be met for a line to be printed.""" \ % (sys.argv[0], " ".join(PsLineMatch().fieldnames)) raise SystemExit psln = PsLineMatch() psln.set_query(sys.argv[1:]) p = os.popen("ps wwaxl") print p.readline()[:-1] # emit header line for ln in p.readlines(): psln.new_line(ln) if psln.is_desirable(): print psln p.close() # alternatively one could consider every argument being a string and # support wildcards: "uid==500" "command~^wm" by means of re, but this # does not show dynamic python code generation, although re.compile # also precompiles. #----------------------------- # @@PLEAC@@_2.1 #----------------------------- # The standard way of validating numbers is to convert them and catch # an exception on failure try: myfloat = float(mystr) print "is a decimal number" except TypeError: print "is not a decimal number" try: myint = int(mystr) print "is an integer" except TypeError: print "is not an integer" # DON'T DO THIS. Explicit checking is prone to errors: if mystr.isdigit(): # Fails on "+4" print 'is a positive integer' else: print 'is not' if re.match("[+-]?\d+$", mystr): # Fails on "- 1" print 'is an integer' else: print 'is not' if re.match("-?(?:\d+(?:\.\d*)?|\.\d+)$", mystr): # Opaque, and fails on "- 1" print 'is a decimal number' else: print 'is not' #----------------------------- # @@PLEAC@@_2.2 #----------------------------- # equal(num1, num2, accuracy) : returns true if num1 and num2 are # equal to accuracy number of decimal places def equal(num1, num2, accuracy): return abs(num1 - num2) < 10**(-accuracy) #----------------------------- from __future__ import division # use / for float div and // for int div wage = 536 # $5.36/hour week = 40 * wage # $214.40 print "One week's wage is: $%.2f" % (week/100) #=> One week's wage is: $214.40 #----------------------------- # @@PLEAC@@_2.3 #----------------------------- rounded = round(num) # rounds to integer #----------------------------- a = 0.255 b = "%.2f" % a print "Unrounded: %f\nRounded: %s" % (a, b) print "Unrounded: %f\nRounded: %.2f" % (a, a) #=> Unrounded: 0.255000 #=> Rounded: 0.26 #=> Unrounded: 0.255000 #=> Rounded: 0.26 #----------------------------- from math import floor, ceil print "number\tint\tfloor\tceil" a = [3.3, 3.5, 3.7, -3.3] for n in a: print "% .1f\t% .1f\t% .1f\t% .1f" % (n, int(n), floor(n), ceil(n)) #=> number int floor ceil #=> 3.3 3.0 3.0 4.0 #=> 3.5 3.0 3.0 4.0 #=> 3.7 3.0 3.0 4.0 #=> -3.3 -3.0 -4.0 -3.0 #----------------------------- # @@PLEAC@@_2.4 #----------------------------- # To convert a string in any base up to base 36, use the optional arg to int(): num = int('0110110', 2) # num is 54 # To convert an int to an string representation in another base, you could use # : import baseconvert def dec2bin(i): return baseconvert.baseconvert(i, baseconvert.BASE10, baseconvert.BASE2) binstr = dec2bin(54) # binstr is 110110 #----------------------------- # @@PLEAC@@_2.5 #----------------------------- for i in range(x,y): pass # i is set to every integer from x to y, excluding y for i in range(x, y, 7): pass # i is set to every integer from x to y, stepsize = 7 print "Infancy is:", for i in range(0,3): print i, print print "Toddling is:", for i in range(3,5): print i, print # DON'T DO THIS: print "Childhood is:", i = 5 while i <= 12: print i i += 1 #=> Infancy is: 0 1 2 #=> Toddling is: 3 4 #=> Childhood is: 5 6 7 8 9 10 11 12 #----------------------------- # @@PLEAC@@_2.6 #----------------------------- # See http://www.faqts.com/knowledge_base/view.phtml/aid/4442 # for a module that does this #----------------------------- # @@PLEAC@@_2.7 #----------------------------- import random # use help(random) to see the (large) list of funcs rand = random.randint(x, y) #----------------------------- rand = random.randint(25, 76) print rand #----------------------------- elt = random.choice(mylist) #----------------------------- import string chars = string.letters + string.digits + "!@$%^&*" password = "".join([random.choice(chars) for i in range(8)]) #----------------------------- # @@PLEAC@@_2.8 #----------------------------- # Changes the default RNG random.seed() # Or you can create independent RNGs gen1 = random.Random(6) gen2 = random.Random(6) gen3 = random.Random(10) a1, b1 = gen1.random(), gen1.random() a2, b2 = gen2.random(), gen2.random() a3, b3 = gen3.random(), gen3.random() # a1 == a2 and b1 == b2 #----------------------------- # @@PLEAC@@_2.9 #----------------------------- # see http://www.sbc.su.se/~per/crng/ or http://www.frohne.westhost.com/rv11reference.htm #----------------------------- # @@PLEAC@@_2.10 #----------------------------- import random mean = 25 sdev = 2 salary = random.gauss(mean, sdev) print "You have been hired at %.2f" % salary #----------------------------- # @@PLEAC@@_2.11 #----------------------------- radians = math.radians(degrees) degrees = math.degrees(radians) # pre-2.3: from __future__ import division import math def deg2rad(degrees): return (degrees / 180) * math.pi def rad2deg(radians): return (radians / math.pi) * 180 #----------------------------- # Use deg2rad instead of math.radians if you have pre-2.3 Python. import math def degree_sine(degrees): radians = math.radians(degrees) return math.sin(radians) #----------------------------- # @@PLEAC@@_2.12 #----------------------------- import math # DON'T DO THIS. Use math.tan() instead. def tan(theta): return math.sin(theta) / math.cos(theta) #---------------- # NOTE: this sets y to 16331239353195370.0 try: y = math.tan(math.pi/2) except ValueError: y = None #----------------------------- # @@PLEAC@@_2.13 #----------------------------- import math log_e = math.log(VALUE) #----------------------------- log_10 = math.log10(VALUE) #----------------------------- def log_base(base, value): return math.log(value) / math.log(base) #----------------------------- # log_base defined as above answer = log_base(10, 10000) print "log10(10,000) =", answer #=> log10(10,000) = 4.0 #----------------------------- # @@PLEAC@@_2.14 #----------------------------- # NOTE: must have NumPy installed. See # http://www.pfdubois.com/numpy/ import Numeric a = Numeric.array( ((3, 2, 3), (5, 9, 8) ), "d") b = Numeric.array( ((4, 7), (9, 3), (8, 1) ), "d") c = Numeric.matrixmultiply(a, b) print c #=> [[ 54. 30.] #=> [ 165. 70.]] print a.shape, b.shape, c.shape #=> (2, 3) (3, 2) (2, 2) #----------------------------- # @@PLEAC@@_2.15 #----------------------------- a = 3+5j b = 2-2j c = a * b print "c =", c #=> c = (16+4j) print c.real, c.imag, c.conjugate() #=> 16.0 4.0 (16-4j) #----------------------------- import cmath print cmath.sqrt(3+4j) #=> (2+1j) #----------------------------- # @@PLEAC@@_2.16 #----------------------------- number = int(hexadecimal, 16) number = int(octal, 8) s = hex(number) s = oct(number) num = raw_input("Gimme a number in decimal, octal, or hex: ").rstrip() if num.startswith("0x"): num = int(num[2:], 16) elif num.startswith("0"): num = int(num[1:], 8) else: num = int(num) print "%(num)d %(num)x %(num)o\n" % { "num": num } #----------------------------- # @@PLEAC@@_2.17 #----------------------------- def commify(amount): amount = str(amount) firstcomma = len(amount)%3 or 3 # set to 3 if would make a leading comma first, rest = amount[:firstcomma], amount[firstcomma:] segments = [first] + [rest[i:i+3] for i in range(0, len(rest), 3)] return ",".join(segments) print commify(12345678) #=> 12,345,678 # DON'T DO THIS. It works on 2.3+ only and is slower and less straightforward # than the non-regex version above. import re def commify(amount): amount = str(amount) amount = amount[::-1] amount = re.sub(r"(\d\d\d)(?=\d)(?!\d*\.)", r"\1,", amount) return amount[::-1] # @@PLEAC@@_2.18 # Printing Correct Plurals #----------------------------- def pluralise(value, root, singular="", plural="s"): if value == 1: return root + singular else: return root + plural print "It took", duration, pluralise(duration, 'hour') print "%d %s %s enough." % (duration, pluralise(duration, 'hour'), pluralise(duration, '', 'is', 'are')) #----------------------------- import re def noun_plural(word): endings = [("ss", "sses"), ("([psc]h)", r"\1es"), ("z", "zes"), ("ff", "ffs"), ("f", "ves"), ("ey", "eys"), ("y", "ies"), ("ix", "ices"), ("([sx])", r"\1es"), ("", "s")] for singular, plural in endings: ret, found = re.subn("%s$"%singular, plural, word) if found: return ret verb_singular = noun_plural; # make function alias #----------------------------- # @@PLEAC@@_2.19 # Program: Calculating Prime Factors #----------------------------- #% bigfact 8 9 96 2178 #8 2**3 # #9 3**2 # #96 2**5 3 # #2178 2 3**2 11**2 #----------------------------- #% bigfact 239322000000000000000000 #239322000000000000000000 2**19 3 5**18 39887 # # #% bigfact 25000000000000000000000000 #25000000000000000000000000 2**24 5**26 #----------------------------- import sys def factorise(num): factors = {} orig = num print num, '\t', # we take advantage of the fact that (i +1)**2 = i**2 + 2*i +1 i, sqi = 2, 4 while sqi <= num: while not num%i: num /= i factors[i] = factors.get(i, 0) + 1 sqi += 2*i + 1 i += 1 if num != 1 and num != orig: factors[num] = factors.get(num, 0) + 1 if not factors: print "PRIME" for factor in sorted(factors): if factor: tmp = str(factor) if factors[factor]>1: tmp += "**" + str(factors[factor]) print tmp, print #-------- if __name__ == '__main__': if len(sys.argv) == 1: print "Usage:", sys.argv[0], " number [number, ]" else: for strnum in sys.argv[1:]: try: num = int(strnum) factorise(num) except ValueError: print strnum, "is not an integer" #----------------------------- # A more Pythonic variant (which separates calculation from printing): def format_factor(base, exponent): if exponent > 1: return "%s**%s"%(base, exponent) return str(base) def factorise(num): factors = {} orig = num # we take advantage of the fact that (i+1)**2 = i**2 + 2*i +1 i, sqi = 2, 4 while sqi <= num: while not num%i: num /= i factors[i] = factors.get(i, 0) + 1 sqi += 2*i + 1 i += 1 if num not in (1, orig): factors[num] = factors.get(num, 0) + 1 if not factors: return ["PRIME"] out = [format_factor(base, exponent) for base, exponent in sorted(factors.items())] return out def print_factors(value): try: num = int(value) if num != float(value): raise ValueError except (ValueError, TypeError): raise ValueError("Can only factorise an integer") factors = factorise(num) print num, "\t", " ".join(factors) # @@PLEAC@@_3.0 #----------------------------- #introduction # There are three common ways of manipulating dates in Python # mxDateTime - a popular third-party module (not discussed here) # time - a fairly low-level standard library module # datetime - a new library module for Python 2.3 and used for most of these samples # (I will use full names to show which module they are in, but you can also use # from datetime import datetime, timedelta and so on for convenience) import time import datetime print "Today is day", time.localtime()[7], "of the current year" # Today is day 218 of the current year today = datetime.date.today() print "Today is day", today.timetuple()[7], "of ", today.year # Today is day 218 of 2003 print "Today is day", today.strftime("%j"), "of the current year" # Today is day 218 of the current year # @@PLEAC@@_3.1 #----------------------------- # Finding todays date today = datetime.date.today() print "The date is", today #=> The date is 2003-08-06 # the function strftime() (string-format time) produces nice formatting # All codes are detailed at http://www.python.org/doc/current/lib/module-time.html print t.strftime("four-digit year: %Y, two-digit year: %y, month: %m, day: %d") #=> four-digit year: 2003, two-digit year: 03, month: 08, day: 06 # @@PLEAC@@_3.2 #----------------------------- # Converting DMYHMS to Epoch Seconds # To work with Epoch Seconds, you need to use the time module # For the local timezone t = datetime.datetime.now() print "Epoch Seconds:", time.mktime(t.timetuple()) #=> Epoch Seconds: 1060199000.0 # For UTC t = datetime.datetime.utcnow() print "Epoch Seconds:", time.mktime(t.timetuple()) #=> Epoch Seconds: 1060195503.0 # @@PLEAC@@_3.3 #----------------------------- # Converting Epoch Seconds to DMYHMS now = datetime.datetime.fromtimestamp(EpochSeconds) #or use datetime.datetime.utcfromtimestamp() print now #=> datetime.datetime(2003, 8, 6, 20, 43, 20) print now.ctime() #=> Wed Aug 6 20:43:20 2003 # or with the time module oldtimetuple = time.localtime(EpochSeconds) # oldtimetuple contains (year, month, day, hour, minute, second, weekday, yearday, daylightSavingAdjustment) print oldtimetuple #=> (2003, 8, 6, 20, 43, 20, 2, 218, 1) # @@PLEAC@@_3.4 #----------------------------- # Adding to or Subtracting from a Date # Use the rather nice datetime.timedelta objects now = datetime.date(2003, 8, 6) difference1 = datetime.timedelta(days=1) difference2 = datetime.timedelta(weeks=-2) print "One day in the future is:", now + difference1 #=> One day in the future is: 2003-08-07 print "Two weeks in the past is:", now + difference2 #=> Two weeks in the past is: 2003-07-23 print datetime.date(2003, 8, 6) - datetime.date(2000, 8, 6) #=> 1095 days, 0:00:00 #----------------------------- birthtime = datetime.datetime(1973, 01, 18, 3, 45, 50) # 1973-01-18 03:45:50 interval = datetime.timedelta(seconds=5, minutes=17, hours=2, days=55) then = birthtime + interval print "Then is", then.ctime() #=> Then is Wed Mar 14 06:02:55 1973 print "Then is", then.strftime("%A %B %d %I:%M:%S %p %Y") #=> Then is Wednesday March 14 06:02:55 AM 1973 #----------------------------- when = datetime.datetime(1973, 1, 18) + datetime.timedelta(days=55) print "Nat was 55 days old on:", when.strftime("%m/%d/%Y").lstrip("0") #=> Nat was 55 days old on: 3/14/1973 # @@PLEAC@@_3.5 #----------------------------- # Dates produce timedeltas when subtracted. diff = date2 - date1 diff = datetime.date(year1, month1, day1) - datetime.date(year2, month2, day2) #----------------------------- bree = datetime.datetime(1981, 6, 16, 4, 35, 25) nat = datetime.datetime(1973, 1, 18, 3, 45, 50) difference = bree - nat print "There were", difference, "minutes between Nat and Bree" #=> There were 3071 days, 0:49:35 between Nat and Bree weeks, days = divmod(difference.days, 7) minutes, seconds = divmod(difference.seconds, 60) hours, minutes = divmod(minutes, 60) print "%d weeks, %d days, %d:%d:%d" % (weeks, days, hours, minutes, seconds) #=> 438 weeks, 5 days, 0:49:35 #----------------------------- print "There were", difference.days, "days between Bree and Nat." #=> There were 3071 days between bree and nat # @@PLEAC@@_3.6 #----------------------------- # Day in a Week/Month/Year or Week Number when = datetime.date(1981, 6, 16) print "16/6/1981 was:" print when.strftime("Day %w of the week (a %A). Day %d of the month (%B).") print when.strftime("Day %j of the year (%Y), in week %W of the year.") #=> 16/6/1981 was: #=> Day 2 of the week (a Tuesday). Day 16 of the month (June). #=> Day 167 of the year (1981), in week 24 of the year. # @@PLEAC@@_3.7 #----------------------------- # Parsing Dates and Times from Strings time.strptime("Tue Jun 16 20:18:03 1981") # (1981, 6, 16, 20, 18, 3, 1, 167, -1) time.strptime("16/6/1981", "%d/%m/%Y") # (1981, 6, 16, 0, 0, 0, 1, 167, -1) # strptime() can use any of the formatting codes from time.strftime() # The easiest way to convert this to a datetime seems to be; now = datetime.datetime(*time.strptime("16/6/1981", "%d/%m/%Y")[0:5]) # the '*' operator unpacks the tuple, producing the argument list. # @@PLEAC@@_3.8 #----------------------------- # Printing a Date # Use datetime.strftime() - see helpfiles in distro or at python.org print datetime.datetime.now().strftime("The date is %A (%a) %d/%m/%Y") #=> The date is Friday (Fri) 08/08/2003 # @@PLEAC@@_3.9 #----------------------------- # High Resolution Timers t1 = time.clock() # Do Stuff Here t2 = time.clock() print t2 - t1 # 2.27236813618 # Accuracy will depend on platform and OS, # but time.clock() uses the most accurate timer it can time.clock(); time.clock() # 174485.51365466841 # 174485.55702610247 #----------------------------- # Also useful; import timeit code = '[x for x in range(10) if x % 2 == 0]' eval(code) # [0, 2, 4, 6, 8] t = timeit.Timer(code) print "10,000 repeats of that code takes:", t.timeit(10000), "seconds" print "1,000,000 repeats of that code takes:", t.timeit(), "seconds" # 10,000 repeats of that code takes: 0.128238644856 seconds # 1,000,000 repeats of that code takes: 12.5396490336 seconds #----------------------------- import timeit code = 'import random; l = random.sample(xrange(10000000), 1000); l.sort()' t = timeit.Timer(code) print "Create a list of a thousand random numbers. Sort the list. Repeated a thousand times." print "Average Time:", t.timeit(1000) / 1000 # Time taken: 5.24391507859 # @@PLEAC@@_3.10 #----------------------------- # Short Sleeps seconds = 3.1 time.sleep(seconds) print "boo" # @@PLEAC@@_3.11 #----------------------------- # Program HopDelta # Save a raw email to disk and run "python hopdelta.py FILE" # and it will process the headers and show the time taken # for each server hop (nb: if server times are wrong, negative dates # might appear in the output). import datetime, email, email.Utils import os, sys, time def extract_date(hop): # According to RFC822, the date will be prefixed with # a semi-colon, and is the last part of a received # header. date_string = hop[hop.find(';')+2:] date_string = date_string.strip() time_tuple = email.Utils.parsedate(date_string) # convert time_tuple to datetime EpochSeconds = time.mktime(time_tuple) dt = datetime.datetime.fromtimestamp(EpochSeconds) return dt def process(filename): # Main email file processing # read the headers and process them f = file(filename, 'rb') msg = email.message_from_file(f) hops = msg.get_all('received') # in reverse order, get the server(s) and date/time involved hops.reverse() results = [] for hop in hops: hop = hop.lower() if hop.startswith('by'): # 'Received: by' line sender = "start" receiver = hop[3:hop.find(' ',3)] date = extract_date(hop) else: # 'Received: from' line sender = hop[5:hop.find(' ',5)] by = hop.find('by ')+3 receiver = hop[by:hop.find(' ', by)] date = extract_date(hop) results.append((sender, receiver, date)) output(results) def output(results): print "Sender, Recipient, Time, Delta" print previous_dt = delta = 0 for (sender, receiver, date) in results: if previous_dt: delta = date - previous_dt print "%s, %s, %s, %s" % (sender, receiver, date.strftime("%Y/%d/%m %H:%M:%S"), delta) print previous_dt = date def main(): # Perform some basic argument checking if len(sys.argv) != 2: print "Usage: mailhop.py FILENAME" else: filename = sys.argv[1] if os.path.isfile(filename): process(filename) else: print filename, "doesn't seem to be a valid file." if __name__ == '__main__': main() # @@PLEAC@@_4.0 #----------------------------- # Python does not automatically flatten lists, in other words # in the following, non-nested contains four elements and # nested contains three elements, the third element of which # is itself a list containing two elements: non_nested = ["this", "that", "the", "other"] nested = ["this", "that", ["the", "other"]] #----------------------------- tune = ["The", "Star-Spangled", "Banner"] #----------------------------- # @@PLEAC@@_4.1 #----------------------------- a = ["quick", "brown", "fox"] a = "Why are you teasing me?".split() text = """ The boy stood on the burning deck, It was as hot as glass. """ lines = [line.lstrip() for line in text.strip().split("\n")] #----------------------------- biglist = [line.rstrip() for line in open("mydatafile")] #----------------------------- banner = "The Mines of Moria" banner = 'The Mines of Moria' #----------------------------- name = "Gandalf" banner = "Speak, " + name + ", and enter!" banner = "Speak, %s, and welcome!" % name #----------------------------- his_host = "www.python.org" import os host_info = os.popen("nslookup " + his_host).read() # NOTE: not really relevant to Python (no magic '$$' variable) python_info = os.popen("ps %d" % os.getpid()).read() shell_info = os.popen("ps $$").read() #----------------------------- # NOTE: not really relevant to Python (no automatic interpolation) banner = ["Costs", "only", "$4.95"] banner = "Costs only $4.95".split() #----------------------------- brax = """ ' " ( ) < > { } [ ] """.split() #""" brax = list("""'"()<>{}[]""") #""" rings = '''They're "Nenya Narya Vilya"'''.split() #''' tags = 'LI TABLE TR TD A IMG H1 P'.split() sample = r'The backslash (\) is often used in regular expressions.'.split() #----------------------------- banner = "The backslash (\\) is often used in regular expressions.".split() #----------------------------- ships = u"Niña Pinta Santa María".split() # WRONG (only three ships) ships = [u"Niña", u"Pinta", u"Santa María"] # right #----------------------------- # @@PLEAC@@_4.2 #----------------------------- def commify_series(args): n = len(args) if n == 0: return "" elif n == 1: return args[0] elif n == 2: return args[0] + " and " + args[1] return ", ".join(args[:-1]) + ", and " + args[-1] commify_series([]) commify_series(["red"]) commify_series(["red", "yellow"]) commify_series(["red", "yellow", "green"]) #----------------------------- mylist = ["red", "yellow", "green"] print "I have", mylist, "marbles." print "I have", " ".join(mylist), "marbles." #=> I have ['red', 'yellow', 'green'] marbles. #=> I have red yellow green marbles. #----------------------------- #!/usr/bin/env python # commify_series - show proper comma insertion in list output data = ( ( 'just one thing', ), ( 'Mutt Jeff'.split() ), ( 'Peter Paul Mary'.split() ), ( 'To our parents', 'Mother Theresa', 'God' ), ( 'pastrami', 'ham and cheese', 'peanut butter and jelly', 'tuna' ), ( 'recycle tired, old phrases', 'ponder big, happy thoughts' ), ( 'recycle tired, old phrases', 'ponder big, happy thoughts', 'sleep and dream peacefully' ), ) def commify_series(terms): for term in terms: if "," in term: sepchar = "; " break else: sepchar = ", " n = len(terms) if n == 0: return "" elif n == 1: return terms[0] elif n == 2: return " and ".join(terms) return "%s%sand %s" % (sepchar.join(terms[:-1]), sepchar, terms[-1]) for item in data: print "The list is: %s." % commify_series(item) #=> The list is: just one thing. #=> The list is: Mutt and Jeff. #=> The list is: Peter, Paul, and Mary. #=> The list is: To our parents, Mother Theresa, and God. #=> The list is: pastrami, ham and cheese, peanut butter and jelly, and tuna. #=> The list is: recycle tired, old phrases and ponder big, happy thoughts. #=> The list is: recycle tired, old phrases; ponder big, happy thoughts; and # sleep and dream peacefully. #----------------------------- # @@PLEAC@@_4.3 #----------------------------- # Python allocates more space than is necessary every time a list needs to # grow and only shrinks lists when more than half the available space is # unused. This means that adding or removing an element will in most cases # not force a reallocation. del mylist[size:] # shrink mylist mylist += [None] * size # grow mylist by appending 'size' None elements # To add an element to the end of a list, use the append method: mylist.append(4) # To insert an element, use the insert method: mylist.insert(0, 10) # Insert 10 at the beginning of the list # To extend one list with the contents of another, use the extend method: list2 = [1,2,3] mylist.extend(list2) # To insert the contents of one list into another, overwriting zero or # more elements, specify a slice: mylist[1:1] = list2 # Don't overwrite anything; grow mylist if needed mylist[2:3] = list2 # Overwrite mylist[2] and grow mylist if needed # To remove one element from the middle of a list: # To remove elements from the middle of a list: del mylist[idx1:idx2] # 0 or more x = mylist.pop(idx) # remove mylist[idx] and assign it to x # You cannot assign to or get a non-existent element: # >>> x = [] # >>> x[4] = 5 # # Traceback (most recent call last): # File "", line 1, in -toplevel- # x[4] = 5 # IndexError: list assignment index out of range # # >>> print x[1000] # # Traceback (most recent call last): # File "", line 1, in -toplevel- # print x[1000] # IndexError: list index out of range #----------------------------- def what_about_that_list(terms): print "The list now has", len(terms), "elements." print "The index of the last element is", len(terms)-1, "(or -1)." print "Element #3 is %s." % terms[3] people = "Crosby Stills Nash Young".split() what_about_that_list(people) #----------------------------- #=> The list now has 4 elements. #=> The index of the last element is 3 (or -1). #=> Element #3 is Young. #----------------------------- people.pop() what_about_that_list(people) #----------------------------- people += [None] * (10000 - len(people)) #----------------------------- #>>> people += [None] * (10000 - len(people)) #>>> what_about_that_list(people) #The list now has 10000 elements. #The index of the last element is 9999 (or -1). #Element #3 is None. #----------------------------- # @@PLEAC@@_4.4 #----------------------------- for item in mylist: pass # do something with item #----------------------------- for user in bad_users: complain(user) #----------------------------- import os for (key, val) in sorted(os.environ.items()): print "%s=%s" % (key, val) #----------------------------- for user in all_users: disk_space = get_usage(user) # find out how much disk space in use if disk_space > MAX_QUOTA: # if it's more than we want ... complain(user) # ... then object vociferously #----------------------------- import os for line in os.popen("who"): if "dalke" in line: print line, # or print line[:-1] # or: print "".join([line for line in os.popen("who") if "dalke" in line]), #----------------------------- for line in myfile: for word in line.split(): # Split on whitespace print word[::-1], # reverse word print # pre 2.3: for line in myfile: for word in line.split(): # Split on whitespace chars = list(word) # Turn the string into a list of characters chars.reverse() print "".join(chars), print #----------------------------- for item in mylist: print "i =", item #----------------------------- # NOTE: you can't modify in place the way Perl does: # data = [1, 2, 3] # for elem in data: # elem -= 1 #print data #=>[1, 2, 3] data = [1, 2, 3] data = [i-1 for i in data] print data #=>[0, 1, 2] # or for i, elem in enumerate(data): data[i] = elem - 1 #----------------------------- # NOTE: strings are immutable in Python so this doesn't translate well. s = s.strip() data = [s.strip() for s in data] for k, v in mydict.items(): mydict[k] = v.strip() #----------------------------- # @@PLEAC@@_4.5 #----------------------------- fruits = ["Apple", "Blackberry"] for fruit in fruits: print fruit, "tastes good in a pie." #=> Apple tastes good in a pie. #=> Blackberry tastes good in a pie. #----------------------------- # DON'T DO THIS: for i in range(len(fruits)): print fruits[i], "tastes good in a pie." # If you must explicitly index, use enumerate(): for i, fruit in enumerate(fruits): print "%s) %s tastes good in a pie."%(i+1, fruit) #----------------------------- rogue_cats = ["Morris", "Felix"] namedict = { "felines": rogue_cats } for cat in namedict["felines"]: print cat, "purrs hypnotically." print "--More--\nYou are controlled." #----------------------------- # As noted before, if you need an index, use enumerate() and not this: for i in range(len(namedict["felines"])): print namedict["felines"][i], "purrs hypnotically." #----------------------------- # @@PLEAC@@_4.6 #----------------------------- uniq = list(set(mylist)) #----------------------------- # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259174 # for a more heavyweight version of a bag seen = {} for item in mylist: seen[item] = seen.get(item, 0) + 1 uniq = seen.keys() #----------------------------- seen = {} uniq = [] for item in mylist: count = seen.get(item, 0) if count == 0: uniq.append(item) seen[item] = count + 1 #----------------------------- # generate a list of users logged in, removing duplicates import os usernames = [line.split()[0] for line in os.popen("who")] uniq = sorted(set(usernames)) print "users logged in:", " ".join(uniq) # DON'T DO THIS: import os ucnt = {} for line in os.popen("who"): username = line.split()[0] # Get the first word ucnt[username] = ucnt.get(username, 0) + 1 # record the users' presence # extract and print unique keys users = ucnt.keys() users.sort() print "users logged in:", " ".join(users) #----------------------------- # @@PLEAC@@_4.7 #----------------------------- # assume a_list and b_list are already loaded aonly = [item for item in a_list if item not in b_list] # A slightly more complex Pythonic version using sets - if you had a few # lists, subtracting sets would be clearer than the listcomp version above a_set = set(a_list) b_set = set(b_list) aonly = list(a_set - b_set) # Elements in a_set but not in b_set # DON'T DO THIS. seen = {} # lookup table to test membership of B aonly = [] # answer # build lookup table for item in b_list: seen[item] = 1 # find only elements in a_list and not in b_list for item in a_list: if not item not in seen: # it's not in 'seen', so add to 'aonly' aonly.append(item) #----------------------------- # DON'T DO THIS. There's lots of ways not to do it. seen = {} # lookup table aonly = [] # answer # build lookup table - unnecessary and poor Python style [seen.update({x: 1}) for x in b_list] aonly = [item for item in a_list if item not in seen] #----------------------------- aonly = list(set(a_list)) # DON'T DO THIS. seen = {} aonly = [] for item in a_list: if item not in seen: aonly.append(item) seen[item] = 1 # mark as seen #----------------------------- mydict["key1"] = 1 mydict["key2"] = 2 #----------------------------- mydict[("key1", "key2")] = (1,2) #----------------------------- # DON'T DO THIS: seen = dict.fromkeys(B.keys()) # DON'T DO THIS pre-2.3: seen = {} for term in B: seen[term] = None #----------------------------- # DON'T DO THIS: seen = {} for k, v in B: seen[k] = 1 #----------------------------- # @@PLEAC@@_4.8 #----------------------------- a = (1, 3, 5, 6, 7, 8) b = (2, 3, 5, 7, 9) a_set = set(a) b_set = set(b) union = a_set | b_set # or a_set.union(b_set) isect = a_set & b_set # or a_set.intersection(b_set) diff = a_set ^ b_set # or a_set.symmetric_difference(b_set) # DON'T DO THIS: union_list = []; isect_list = []; diff = [] union_dict = {}; isect_dict = {} count = {} #----------------------------- # DON'T DO THIS: for e in a: union_dict[e] = 1 for e in b: if union_dict.has_key(e): isect_dict[e] = 1 union_dict[e] = 1 union_list = union_dict.keys() isect_list = isect_dict.keys() #----------------------------- # DON'T DO THIS: for e in a + b: if union.get(e, 0) == 0: isect[e] = 1 union[e] = 1 union = union.keys() isect = isect.keys() #----------------------------- # DON'T DO THIS: count = {} for e in a + b: count[e] = count.get(e, 0) + 1 union = []; isect = []; diff = [] for e in count.keys(): union.append(e) if count[e] == 2: isect.append(e) else: diff.append(e) #----------------------------- # DON'T DO THIS: isect = []; diff = []; union = [] count = {} for e in a + b: count[e] = count.get(e, 0) + 1 for e, num in count.items(): union.append(e) [None, diff, isect][num].append(e) #----------------------------- # @@PLEAC@@_4.9 #----------------------------- # "append" for a single term and # "extend" for many terms mylist1.extend(mylist2) #----------------------------- mylist1 = mylist1 + mylist2 mylist1 += mylist2 #----------------------------- members = ["Time", "Flies"] initiates = ["An", "Arrow"] members.extend(initiates) # members is now ["Time", "Flies", "An", "Arrow"] #----------------------------- members[2:] = ["Like"] + initiates print " ".join(members) members[:1] = ["Fruit"] # or members[1] = "Fruit" members[-2:] = ["A", "Banana"] print " ".join(members) #----------------------------- #=> Time Flies Like An Arrow #=> Fruit Flies Like A Banana #----------------------------- # @@PLEAC@@_4.10 #----------------------------- # reverse mylist into revlist revlist = mylist[::-1] # or revlist = list(reversed(mylist)) # or pre-2.3 revlist = mylist[:] # shallow copy revlist.reverse() #----------------------------- for elem in reversed(mylist): pass # do something with elem # or for elem in mylist[::-1]: pass # do something with elem # if you need the index and the list won't take too much memory: for i, elem in reversed(list(enumerate(mylist))): pass # If you absolutely must explicitly index: for i in range(len(mylist)-1, -1, -1): pass #----------------------------- descending = sorted(users, reverse=True) #----------------------------- # @@PLEAC@@_4.11 #----------------------------- # remove n elements from the front of mylist mylist[:n] = [] # or del mylist[:n] # remove n elements from front of mylist, saving them into front front, mylist[:n] = mylist[:n], [] # remove 1 element from the front of mylist, saving it in front: front = mylist.pop(0) # remove n elements from the end of mylist mylist[-n:] = [] # or del mylist[-n:] # remove n elements from the end of mylist, saving them in end end, mylist[-n:] = mylist[-n:], [] # remove 1 element from the end of mylist, saving it in end: end = mylist.pop() #----------------------------- def shift2(terms): front = terms[:2] terms[:2] = [] return front def pop2(terms): back = terms[-2:] terms[-2:] = [] return back #----------------------------- friends = "Peter Paul Mary Jim Tim".split() this, that = shift2(friends) # 'this' contains Peter, 'that' has Paul, and # 'friends' has Mary, Jim, and Tim beverages = "Dew Jolt Cola Sprite Fresca".split() pair = pop2(beverages) # pair[0] contains Sprite, pair[1] has Fresca, # and 'beverages' has (Dew, Jolt, Cola) # In general you probably shouldn't do things that way because it's # not clear from these calls that the lists are modified. #----------------------------- # @@PLEAC@@_4.12 for item in mylist: if criterion: pass # do something with matched item break else: pass # unfound #----------------------------- for idx, elem in enumerate(mylist): if criterion: pass # do something with elem found at mylist[idx] break else: pass ## unfound #----------------------------- # Assuming employees are sorted high->low by wage. for employee in employees: if employee.category == 'engineer': highest_engineer = employee break print "Highest paid engineer is:", highest_engineer.name #----------------------------- # If you need the index, use enumerate: for i, employee in enumerate(employees): if employee.category == 'engineer': highest_engineer = employee break print "Highest paid engineer is: #%s - %s" % (i, highest_engineer.name) # The following is rarely appropriate: for i in range(len(mylist)): if criterion: pass # do something break else: pass ## not found #----------------------------- # @@PLEAC@@_4.13 matching = [term for term in mylist if test(term)] #----------------------------- matching = [] for term in mylist: if test(term): matching.append(term) #----------------------------- bigs = [num for num in nums if num > 1000000] pigs = [user for (user, val) in users.items() if val > 1e7] #----------------------------- import os matching = [line for line in os.popen("who") if line.startswith("gnat ")] #----------------------------- engineers = [employee for employee in employees if employee.position == "Engineer"] #----------------------------- secondary_assistance = [applicant for applicant in applicants if 26000 <= applicant.income < 30000] #----------------------------- # @@PLEAC@@_4.14 sorted_list = sorted(unsorted_list) #----------------------------- # pids is an unsorted list of process IDs import os, signal, time for pid in sorted(pids): print pid pid = raw_input("Select a process ID to kill: ") try: pid = int(pid) except ValueError: raise SystemExit("Exiting ... ") os.kill(pid, signal.SIGTERM) time.sleep(2) try: os.kill(pid, signal.SIGKILL) except OSError, err: if err.errno != 3: # was it already killed? raise #----------------------------- descending = sorted(unsorted_list, reverse=True) #----------------------------- allnums = [4, 19, 8, 3] allnums.sort(reverse=True) # inplace #----------------------------- # pre 2.3 allnums.sort() # inplace allnums.reverse() # inplace #or allnums = sorted(allnums, reverse=True) # reallocating #----------------------------- # @@PLEAC@@_4.15 ordered = sorted(unordered, cmp=compare) #----------------------------- ordered = sorted(unordered, key=compute) # ...which is somewhat equivalent to: precomputed = [(compute(x), x) for x in unordered] precomputed.sort(lambda a, b: cmp(a[0], b[0])) ordered = [v for k,v in precomputed.items()] #----------------------------- # DON'T DO THIS. def functional_sort(mylist, function): mylist.sort(function) return mylist ordered = [v for k,v in functional_sort([(compute(x), x) for x in unordered], lambda a, b: cmp(a[0], b[0]))] #----------------------------- ordered = sorted(employees, key=lambda x: x.name) #----------------------------- for employee in sorted(employees, key=lambda x: x.name): print "%s earns $%s" % (employee.name, employee.salary) #----------------------------- sorted_employees = sorted(employees, key=lambda x: x.name): for employee in sorted_employees: print "%s earns $%s" % (employee.name, employee.salary) # load bonus for employee in sorted_employees: if bonus(employee.ssn): print employee.name, "got a bonus!" #----------------------------- sorted_employees = sorted(employees, key=lambda x: (x.name, x.age)): #----------------------------- # NOTE: Python should allow access to the pwd fields by name # as well as by position. import pwd # fetch all users users = pwd.getpwall() for user in sorted(users, key=lambda x: x[0]): print user[0] #----------------------------- sorted_list = sorted(names, key=lambda x: x[:1]) #----------------------------- sorted_list = sorted(strings, key=len) #----------------------------- # DON'T DO THIS. temp = [(len(s), s) for s in strings] temp.sort(lambda a, b: cmp(a[0], b[0])) sorted_list = [x[1] for x in temp] #----------------------------- # DON'T DO THIS. def functional_sort(mylist, function): mylist.sort(function) return mylist sorted_fields = [v for k,v in functional_sort( [(int(re.search(r"(\d+)", x).group(1)), x) for x in fields], lambda a, b: cmp(a[0], b[0]))] #----------------------------- entries = [line[:-1].split() for line in open("/etc/passwd")] for entry in sorted(entries, key=lambda x: (x[3], x[2], x[0])): print entry #----------------------------- # @@PLEAC@@_4.16 #----------------------------- import itertools for process in itertools.cycle([1, 2, 3, 4, 5]): print "Handling process", process time.sleep(1) # pre 2.3: import time class Circular(object): def __init__(self, data): assert len(data) >= 1, "Cannot use an empty list" self.data = data def __iter__(self): while True: for elem in self.data: yield elem circular = Circular([1, 2, 3, 4, 5]) for process in circular: print "Handling process", process time.sleep(1) # DON'T DO THIS. All those pops and appends mean that the list needs to be # constantly reallocated. This is rather bad if your list is large: import time class Circular(object): def __init__(self, data): assert len(data) >= 1, "Cannot use an empty list" self.data = data def next(self): head = self.data.pop(0) self.data.append(head) return head circular = Circular([1, 2, 3, 4, 5]) while True: process = circular.next() print "Handling process", process time.sleep(1) #----------------------------- # @@PLEAC@@_4.17 #----------------------------- # generate a random permutation of mylist in place import random random.shuffle(mylist) #----------------------------- # @@PLEAC@@_4.18 #----------------------------- import sys def make_columns(mylist, screen_width=78): if mylist: maxlen = max([len(elem) for elem in mylist]) maxlen += 1 # to make extra space cols = max(1, screen_width/maxlen) rows = 1 + len(mylist)/cols # pre-create mask for faster computation mask = "%%-%ds " % (maxlen-1) for n in range(rows): row = [mask%elem for elem in mylist[n::rows]] yield "".join(row).rstrip() for row in make_columns(sys.stdin.readlines(), screen_width=50): print row # A more literal translation import sys # subroutine to check whether at last item on line def EOL(item): return (item+1) % cols == 0 # Might not be portable to non-linux systems def getwinsize(): # Use the curses module if installed try: import curses stdscr = curses.initscr() rows, cols = stdscr.getmaxyx() return cols except ImportError: pass # Nope, so deal with ioctl directly. What value for TIOCGWINSZ? try: import termios TIOCGWINSZ = termios.TIOCGWINSZ except ImportError: TIOCGWINSZ = 0x40087468 # This is Linux specific import struct, fcntl s = struct.pack("HHHH", 0, 0, 0, 0) try: x = fcntl.ioctl(sys.stdout.fileno(), TIOCGWINSZ, s) except IOError: return 80 rows, cols = struct.unpack("HHHH", x)[:2] return cols cols = getwinsize() data = [s.rstrip() for s in sys.stdin.readlines()] if not data: maxlen = 1 else: maxlen = max(map(len, data)) maxlen += 1 # to make extra space # determine boundaries of screen cols = (cols / maxlen) or 1 rows = (len(data)+cols) / cols # pre-create mask for faster computation mask = "%%-%ds " % (maxlen-1) # now process each item, picking out proper piece for this position for item in range(rows * cols): target = (item % cols) * rows + (item/cols) if target < len(data): piece = mask % data[target] else: piece = mask % "" if EOL(item): piece = piece.rstrip() # don't blank-pad to EOL sys.stdout.write(piece) if EOL(item): sys.stdout.write("\n") if EOL(item): sys.stdout.write("\n") #----------------------------- # @@PLEAC@@_4.19 #----------------------------- def factorial(n): s = 1 while n: s *= n n -= 1 return s #----------------------------- def permute(alist, blist=[]): if not alist: yield blist for i, elem in enumerate(alist): for elem in permute(alist[:i] + alist[i+1:], blist + [elem]): yield elem for permutation in permute(range(4)): print permutation #----------------------------- # DON'T DO THIS import fileinput # Slightly modified from # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66463 def print_list(alist, blist=[]): if not alist: print ' '.join(blist) for i in range(len(alist)): blist.append(alist.pop(i)) print_list(alist, blist) alist.insert(i, blist.pop()) for line in fileinput.input(): words = line.split() print_list(words) #----------------------------- class FactorialMemo(list): def __init__(self): self.append(1) def __call__(self, n): try: return self[n] except IndexError: ret = n * self(n-1) self.append(ret) return ret factorial = FactorialMemo() import sys import time sys.setrecursionlimit(10000) start = time.time() factorial(2000) f1 = time.time() - start factorial(2100) # First 2000 values are cached already f2 = time.time() - f1 - start print "Slow first time:", f1 print "Quicker the second time:", f2 #----------------------------- class MemoizedPermutations(list): def __init__(self, alist): self.permute(alist, []) def permute(self, alist, blist): if not alist: self.append(blist) for i, elem in enumerate(alist): self.permute(alist[:i] + alist[i+1:], blist + [elem]) def __call__(self, seq, idx): return [seq[n] for n in self[idx]] p5 = MemoizedPermutations(range(5)) words = "This sentence has five words".split() print p5(words, 17) print p5(words, 81) #----------------------------- # @@PLEAC@@_5.0 #----------------------------- # dictionaries age = {"Nat": 24, "Jules": 24, "Josh": 17} #----------------------------- age = {} age["Nat"] = 24 age["Jules"] = 25 age["Josh"] = 17 #----------------------------- food_color = {"Apple": "red", "Banana": "yellow", "Lemon": "yellow", "Carrot": "orange" } #----------------------------- # NOTE: keys must be quoted in Python # @@PLEAC@@_5.1 mydict[key] = value #----------------------------- # food_color defined per the introduction food_color["Raspberry"] = "pink" print "Known foods:" for food in food_color: print food #=> Known foods: #=> Raspberry #=> Carrot #=> Lemon #=> Apple #=> Banana #----------------------------- # @@PLEAC@@_5.2 # does mydict have a value for key? if key in mydict: pass # it exists else: pass # it doesn't #----------------------------- # food_color per the introduction for name in ("Banana", "Martini"): if name in food_color: print name, "is a food." else: print name, "is a drink." #=> Banana is a food. #=> Martini is a drink. #----------------------------- age = {} age["Toddler"] = 3 age["Unborn"] = 0 age["Phantasm"] = None for thing in ("Toddler", "Unborn", "Phantasm", "Relic"): print ("%s:"%thing), if thing in age: print "Exists", if age[thing] is not None: print "Defined", if age[thing]: print "True", print #=> Toddler: Exists Defined True #=> Unborn: Exists Defined #=> Phantasm: Exists #=> Relic: #----------------------------- # Get file sizes for the requested filenames import fileinput, os size = {} for line in fileinput.input(): filename = line.rstrip() if filename in size: continue size[filename] = os.path.getsize(filename) # @@PLEAC@@_5.3 # remove key and its value from mydict del mydict[key] #----------------------------- # food_color as per Introduction def print_foods(): foods = food_color.keys() print "Keys:", " ".join(foods) print "Values:", for food in foods: color = food_color[food] if color is not None: print color, else: print "(undef)", print print "Initially:" print_foods() print "\nWith Banana set to None" food_color["Banana"] = None print_foods() print "\nWith Banana deleted" del food_color["Banana"] print_foods() #=> Initially: #=> Keys: Carrot Lemon Apple Banana #=> Values: orange yellow red yellow #=> #=> With Banana set to None #=> Keys: Carrot Lemon Apple Banana #=> Values: orange yellow red (undef) #=> #=> With Banana deleted #=> Keys: Carrot Lemon Apple #=> Values: orange yellow red #----------------------------- for key in ["Banana", "Apple", "Cabbage"]: del food_color[key] #----------------------------- # @@PLEAC@@_5.4 #----------------------------- for key, value in mydict.items(): pass # do something with key and value # If mydict is large, use iteritems() instead for key, value in mydict.iteritems(): pass # do something with key and value #----------------------------- # DON'T DO THIS: for key in mydict.keys(): value = mydict[key] # do something with key and value #----------------------------- # food_color per the introduction for food, color in food_color.items(): print "%s is %s." % (food, color) # DON'T DO THIS: for food in food_color: color = food_color[food] print "%s is %s." % (food, color) #----------------------------- print """%(food)s is %(color)s. """ % vars() #----------------------------- for food, color in sorted(food_color.items()): print "%s is %s." % (food, color) #----------------------------- #!/usr/bin/env python # countfrom - count number of messages from each sender import sys if len(sys.argv) > 1: infile = open(sys.argv[1]) else: infile = sys.stdin counts = {} for line in infile: if line.startswith("From: "): name = line[6:-1] counts[name] = counts.get(name, 0) + 1 for (name, count) in sorted(counts.items()): print "%s: %s" % (name, count) #----------------------------- # @@PLEAC@@_5.5 for key, val in mydict.items(): print key, "=>", val #----------------------------- print "\n".join([("%s => %s" % item) for item in mydict.items()]) #----------------------------- print mydict #=> {'firstname': 'Andrew', 'login': 'dalke', 'state': 'New Mexico', 'lastname': 'Dalke'} #----------------------------- import pprint pprint.pprint(dict) #=> {'firstname': 'Andrew', #=> 'lastname': 'Dalke', #=> 'login': 'dalke', #=> 'state': 'New Mexico'} #----------------------------- # @@PLEAC@@_5.6 #----------------------------- class SequenceDict(dict): """ Dictionary that remembers the insertion order. The lists returned by keys(), values() and items() are in the insertion order. """ def __init__(self, *args): self._keys={} # key --> id self._ids={} # id --> key self._next_id=0 def __setitem__(self, key, value): self._keys[key]=self._next_id self._ids[self._next_id]=key self._next_id+=1 return dict.__setitem__(self, key, value) def __delitem__(self, key): id=self._keys[key] del(self._keys[key]) del(self._ids[id]) return dict.__delitem__(self, key) def values(self): values=[] ids=list(self._ids.items()) ids.sort() for id, key in ids: values.append(self[key]) return values def items(self): items=[] ids=list(self._ids.items()) ids.sort() for id, key in ids: items.append((key, self[key])) return items def keys(self): ids=list(self._ids.items()) ids.sort() keys=[] for id, key in ids: keys.append(key) return keys def update(self, d): for key, value in d.items(): self[key]=value def clear(self): dict.clear(self) self._keys={} self._ids={} self._next_id=0 def testSequenceDict(): sd=SequenceDict() # First Test sd[3]="first" sd[2]="second" sd[1]="third" print sd.keys() print sd.items() print sd.values() del(sd[1]) del(sd[2]) del(sd[3]) print sd.keys(), sd.items(), sd.values() print sd._ids, sd._keys print "---------------" # Second Test sd["b"]="first" sd["a"]="second" sd.update({"c": "third"}) print sd.keys() print sd.items() print sd.values() del(sd["b"]) del(sd["a"]) del(sd["c"]) print sd.keys(), sd.items(), sd.values() print sd._ids, sd._keys def likePerlCookbook(): food_color=SequenceDict() food_color["Banana"]="Yellow"; food_color["Apple"]="Green"; food_color["Lemon"]="Yellow" print "In insertion order, the foods' color are:" for food, color in food_color.items(): print "%s is colored %s" % (food, color) if __name__=="__main__": #testSequenceDict() likePerlCookbook() # @@PLEAC@@_5.7 import os ttys = {} who = os.popen("who") for line in who: user, tty = line.split()[:2] ttys.setdefault(user, []).append(tty) for (user, tty_list) in sorted(ttys.items()): print user + ": " + " ".join(tty_list) #----------------------------- import pwd for (user, tty_list) in ttys.items(): print user + ":", len(tty_list), "ttys." for tty in sorted(tty_list): try: uid = os.stat("/dev/" + tty).st_uid user = pwd.getpwuid(uid)[0] except os.error: user = "(not available)" print "\t%s (owned by %s)" % (tty, user) # @@PLEAC@@_5.8 # lookup_dict maps keys to values reverse = dict([(val, key) for (key, val) in lookup_dict.items()]) #----------------------------- surname = {"Mickey": "Mantle", "Babe": "Ruth"} first_name = dict([(last, first) for (first, last) in surname.items()]) print first_name["Mantle"] #=> Mickey #----------------------------- #!/usr/bin/perl -w # foodfind - find match for food or color import sys if not sys.argv[1:]: raise SystemExit("usage: foodfind food_or_color") given = sys.argv[1] color_dict = {"Apple": "red", "Banana": "yellow", "Lemon": "yellow", "Carrot": "orange", } food_dict = dict([(color, food) for (food, color) in color_dict.items()]) if given in color_dict: print given, "is a food with color", color_dict[given] elif given in food_dict: print food_dict[given], "is a food with color", given #----------------------------- # food_color as per the introduction foods_with_color = {} for food, color in food_color.items(): foods_with_color.setdefault(color, []).append(food) print " ".join(foods_with_color["yellow"]), "were yellow foods." #----------------------------- # @@PLEAC@@_5.9 #----------------------------- # mydict is the hash to sort for key, value in sorted(mydict.items()): # do something with key, value #----------------------------- # food_color as per section 5.8 for food, color in sorted(food_color.items()): print "%s is %s." % (food, color) #----------------------------- # NOTE: alternative version for item in sorted(food_color.items()): print "%s is %s." % item #----------------------------- # NOTE: alternative version showing a user-defined function def food_cmp(x, y): return cmp(x, y) for food, color in sorted(food_color, cmp=food_cmp): print "%s is %s." % (food, color) #----------------------------- def food_len_cmp(x, y): return cmp(len(x), len(y)) for food in sorted(food_color, cmp=food_len_cmp): print "%s is %s." % (food, food_color[food]) # In this instance, however, the following is both simpler and faster: for food in sorted(food_color, key=len): print "%s is %s." % (food, food_color[food]) #----------------------------- # @@PLEAC@@_5.10 #----------------------------- merged = {} merged.update(a_dict) merged.update(b_dict) #----------------------------- # NOTE: alternative version merged = a_dict.copy() merged.update(b_dict) #----------------------------- # DON'T DO THIS: merged = {} for k, v in a_dict.items(): merged[k] = v for k, v in b_dict.items(): merged[k] = v #----------------------------- # food_color as per section 5.8 drink_color = {"Galliano": "yellow", "Mai Tai": "blue"} ingested_color = drink_color.copy() ingested_color.update(food_color) #----------------------------- # DON'T DO THIS: drink_color = {"Galliano": "yellow", "Mai Tai": "blue"} substance_color = {} for k, v in food_color.items(): substance_color[k] = v for k, v in drink_color.items(): substance_color[k] = v #----------------------------- # DON'T DO THIS: substance_color = {} for mydict in (food_color, drink_color): for k, v in mydict: substance_color[k] = v #----------------------------- # DON'T DO THIS: substance_color = {} for item in food_color.items() + drink_color.items(): for k, v in mydict: substance_color[k] = v #----------------------------- substance_color = {} for mydict in (food_color, drink_color): for k, v in mydict.items(): if substance_color.has_key(k): print "Warning:", k, "seen twice. Using the first definition." continue substance_color[k] = v # I think it's a copy, in which case all_colors = new_colors.copy() # @@PLEAC@@_5.11 common = [k for k in dict1 if k in dict2] #----------------------------- this_not_that = [k for k in dict1 if k not in dict2] #----------------------------- # citrus_color is a dict mapping citrus food name to its color. citrus_color = {"Lemon": "yellow", "Orange": "orange", "Lime": "green"} # build up a list of non-citrus foods non_citrus = [k for k in food_color if k not in citruscolor] #----------------------------- # @@PLEAC@@_5.12 #----------------------------- # references as keys of dictionaries is no pb in python name = {} for filename in ("/etc/termcap", "/vmunix", "/bin/cat"): try: myfile = open(filename) except IOError: pass else: names[myfile] = filename print "open files:", ", ".join(name.values()) for f, fname in name.items(): f.seek(0, 2) # seek to the end print "%s is %d bytes long." % (fname, f.tell()) #----------------------------- # @@PLEAC@@_5.13 # Python doesn't allow presizing of dicts, but hashing is efficient - # it only re-sizes at intervals, not every time an item is added. # @@PLEAC@@_5.14 count = {} for element in mylist: count[element] = count.get(element, 0) + 1 # @@PLEAC@@_5.15 #----------------------------- import fileinput father = {'Cain': 'Adam', 'Abel': 'Adam', 'Seth': 'Adam', 'Enoch': 'Cain', 'Irad': 'Enoch', 'Mehujael': 'Irad', 'Methusael': 'Mehujael', 'Lamech': 'Methusael', 'Jabal': 'Lamech', 'Tubalcain': 'Lamech', 'Enos': 'Seth', } for line in fileinput.input(): person = line.rstrip() while person: # as long as we have people, print person, # print the current name person = father.get(person) # set the person to the person's father print #----------------------------- import fileinput children = {} for k, v in father.items(): children.setdefault(v, []).append(k) for line in fileinput.input(): person = line.rstrip() kids = children.get(person, ["nobody"]) print person, "begat", ", ".join(kids) #----------------------------- import sys, re pattern = re.compile(r'^\s*#\s*include\s*<([^>]+)') includes = {} for filename in filenames: try: infile = open(filename) except IOError, err: print>>sys.stderr, err continue for line in infile: match = pattern.match(line) if match: includes.setdefault(match.group(1), []).append(filename) #----------------------------- # list of files that don't include others mydict = {} for e in reduce(lambda a,b: a + b, includes.values()): if not includes.has_key(e): mydict[e] = 1 include_free = mydict.keys() include_free.sort() # @@PLEAC@@_5.16 #----------------------------- #!/usr/bin/env python -w # dutree - print sorted indented rendition of du output import os, sys def get_input(args): # NOTE: This is insecure - use only from trusted code! cmd = "du " + " ".join(args) infile = os.popen(cmd) dirsize = {} kids = {} for line in infile: size, name = line[:-1].split("\t", 1) dirsize[name] = int(size) parent = os.path.dirname(name) kids.setdefault(parent, []).append(name) # Remove the last field added, which is the root kids[parent].pop() if not kids[parent]: del kids[parent] return name, dirsize, kids def getdots(root, dirsize, kids): size = cursize = dirsize[root] if kids.has_key(root): for kid in kids[root]: cursize -= dirsize[kid] getdots(kid, dirsize, kids) if size != cursize: dot = root + "/." dirsize[dot] = cursize kids[root].append(dot) def output(root, dirsize, kids, prefix = "", width = 0): path = os.path.basename(root) size = dirsize[root] fmt = "%" + str(width) + "d %s" line = fmt % (size, path) print prefix + line prefix += (" " * (width-1)) + "| " + (" " * len(path)) if kids.has_key(root): kid_list = kids[root] kid_list.sort(lambda x, y, dirsize=dirsize: cmp(dirsize[x], dirsize[y])) width = len(str(dirsize[kid_list[-1]])) for kid in kid_list: output(kid, dirsize, kids, prefix, width) def main(): root, dirsize, kids = get_input(sys.argv[1:]) getdots(root, dirsize, kids) output(root, dirsize, kids) if __name__ == "__main__": main() # @@PLEAC@@_6.0 # Note: regexes are used less often in Python than in Perl as tasks are often # covered by string methods, or specialised objects, modules, or packages. import re # "re" is the regular expression module. re.search("sheep",meadow) # returns a MatchObject is meadow contains "sheep". if not re.search("sheep",meadow): print "no sheep on this meadow only a fat python." # replacing strings is not done by "re"gular expressions. meadow = meadow.replace("old","new") # replace "old" with "new" and assign result. #----------------------------- re.search("ovine",meadow) meadow = """Fine bovines demand fine toreadors. Muskoxen are polar ovibovine species. Grooviness went out of fashion decades ago.""" meadow = "Ovines are found typically in ovaries." if re.search(r"\bovines\b",meadow,re.I) : print "Here be sheep!" #----------------------------- # The tricky bit mystr = "good food" re.sub("o*","e",mystr,1) # gives 'egood food' echo ababacaca | python -c "import sys,re; print re.search('(a|ba|b)+(a|ac)+',sys.stdin.read()).group()" #----------------------------- # pattern matching modifiers # assume perl code iterates over some file import re, fileinput for ln = fileinput.input(): fnd = re.findall("(\d+)",ln) if len(fnd) > 0: print "Found number %s" % (fnd[0]) # ---------------------------- digits = "123456789" nonlap = re.findall("(\d\d\d)", digits) yeslap = ["not yet"] print "Non-overlapping:",",".join(nonlap) print "Overlapping :",",".join(yeslap) # ---------------------------- mystr = "And little lambs eat ivy" fnd = re.search("(l[^s]*s)", mystr) print "(%s) (%s) (%s)" % (mystr[:fnd.start()], fnd.group(), mystr[fnd.end():]) # (And ) (little lambs) ( eat ivy) # @@PLEAC@@_6.1 import re dst = re.sub("this","that",src) #----------------------------- # strip to basename basename = re.sub(".*/(?=[^/]+)","",progname) # Make All Words Title-Cased # DON'T DO THIS - use str.title() instead def cap(mo): return mo.group().capitalize() re.sub("(?P\w+)",cap,"make all words title-cased") # /usr/man/man3/foo.1 changes to /usr/man/cat3/foo.1 manpage = "/usr/man/man3/foo.1" catpage = re.sub("man(?=\d)","cat",manpage) #----------------------------- bindirs = "/usr/bin /bin /usr/local/bin".split() libdirs = [d.replace("bin", "lib") for d in bindirs] print " ".join(libdirs) #=> /usr/lib /lib /usr/local/lib #----------------------------- # strings are never modified in place. #----------------------------- # @@PLEAC@@_6.2 ##--------------------------- # DON'T DO THIS. use line[:-1].isalpha() [this probably goes for the # remainder of this section too!] import re if re.match("^[A-Za-z]+$",line): print "pure alphabetic" ##--------------------------- if re.match(r"^[^\W\d_]+$", line, re.LOCALE): print "pure alphabetic" ##--------------------------- import re import locale try: locale.setlocale(locale.LC_ALL, 'fr_CA.ISO8859-1') except: print "couldn't set locale to French Cnadian" raise SystemExit DATA=""" silly façade coöperate niño Renée Molière hæmoglobin naïve tschüß random!stuff#here """ for ln in DATA.split(): ln = ln.rstrip() if re.match(r"^[^\W\d_]+$",ln,re.LOCALE): print "%s: alphabetic" % (ln) else: print "%s: line noise" % (ln) # although i dont think "coöperate" should be in canadian ##--------------------------- # @@PLEAC@@_6.3 # Matching Words "\S+" # as many non-whitespace bytes as possible "[A-Za-z'-]+" # as many letters, apostrophes, and hyphens # string split is similar to splitting on "\s+" "A text with some\tseparator".split() "\b*([A-Za-z]+)\b*" # word boundaries "\s*([A-Za-z]+)\s*" # might work too as on letters are allowed. re.search("\Bis\B","this thistle") # matches on thistle not on this re.search("\Bis\B","vis-a-vis") # does not match # @@PLEAC@@_6.4 #----------------------------- #!/usr/bin/python # resname - change all "foo.bar.com" style names in the input stream # into "foo.bar.com [204.148.40.9]" (or whatever) instead import socket # load inet_addr import fileinput import re match = re.compile("""(?P # capture hostname (?: # these parens for grouping only [\w-]+ # hostname component \. # ant the domain dot ) + # now repeat that whole thing a bunch of times [A-Za-z] # next must be a letter [\w-] + # now trailing domain part ) # end of hostname capture """,re.VERBOSE) # for nice formatting def repl(match_obj): orig_hostname = match_obj.group("hostname") try: addr = socket.gethostbyname(orig_hostname) except socket.gaierror: addr = "???" return "%s [%s]" % (orig_hostname, addr) for ln in fileinput.input(): print match.sub(repl, ln) #----------------------------- re.sub("""(?x) # nicer formatting \# # a pound sign (\w+) # the variable name \# # another pound sign """, lambda m: eval(m.group(1)), # replace with the value of the global variable line ) ##----------------------------- re.sub("""(?x) # nicer formatting \# # a pound sign (\w+) # the variable name \# # another pound sign """, lambda m: eval(eval(m.group(1))), # replace with the value of *any* variable line ) ##----------------------------- # @@PLEAC@@_6.5 import re pond = "one fish two fish red fish blue fish" fishes = re.findall(r"(?i)(\w+)\s+fish\b",pond) if len(fishes)>2: print "The third fish is a %s one." % (fishes[2]) ##----------------------------- re.findall(r"(?i)(?:\w+\s+fish\s+){2}(\w+)\s+fish",pond) ##----------------------------- count = 0 for match_object in re.finditer(r"PAT", mystr): count += 1 # or whatever you want to do here # "progressive" matching might be better if one wants match 5 from 50. # to count use count = len(re.findall(r"PAT",mystr)) count = len(re.findall(r"aba","abaababa")) # "count" overlapping matches count = len(re.findall(r"(?=aba)","abaababa")) # FASTEST non-overlapping might be str.count "abaababa".count("aba") ##----------------------------- pond = "one fish two fish red fish blue fish" colors = re.findall(r"(?i)(\w+)\s+fish\b",pond) # get all matches color = colors[2] # then the one we want # or without a temporary list color = re.findall(r"(?i)(\w+)\s+fish\b",pond)[2] # just grab element 3 print "The third fish in the pond is %s." % (color) ##----------------------------- import re pond = "one fish two fish red fish blue fish" matches = re.findall(r"(\w+)\s+fish\b",pond) evens = [fish for (i, fish) in enumerate(matches) if i%2] print "Even numbered fish are %s." % (" ".join(evens)) ##----------------------------- count = 0 def four_is_sushi(match_obj): global count count += 1 if count==4: return "sushi%s" % (match_obj.group(2)) return "".join(match_obj.groups()) re.sub(r"""(?x) # VERBOSE \b # makes next \w more efficient ( \w+ ) # this is what we'll be changing ( \s+ fish \b )""", four_is_sushi, pond) # one fish two fish red fish sushi fish ##----------------------------- # greedily last_fish = re.findall(r"(?i).*\b(\w+)\s+fish\b",pond) ##----------------------------- pond = "One fish two fish red fish blue fish swim here" color = re.findall(r"(?i)\b(\w+)\s+fish\b",pond)[-1] print "Last fish is "+color+"." # FASTER using string. lastfish = pond.rfind("fish") color = pond[:lastfish].split()[-1] ##----------------------------- r"""(?x) A # find some pattern A (?! # mustn't be able to find .* # something A # and A ) $ # through the end of string """ pond = "One fish two fish red fish blue fish swim here" fnd = re.findall(r"""(?xis) # VERBOSE, CASEINSENSITIVE, DOTALL \b ( \w+ ) \s+ fish \b (?! .* \b fish \b )""", pond) if len(fnd): print "Last fish is %s." % (fnd[0]) else: print "Failed!" # @@PLEAC@@_6.6 # Matching Multiple Lines # #!/usr/bin/python # killtags - very bad html tag killer import re import sys text = open(sys.argv[1]).read() # read the whole file text = re.sub("(?ms)<.*?>","",text) # strip tags (terrible print text ## ---------------------------- #!/usr/bin/python # headerfy: change certain chapter headers to html import sys, re match = re.compile(r"""(?xms) # re.VERBOSE, re.MULTILINE, and re.DOTALL \A # start of the string (?P # capture in g Chapter # literal string \s+ # mandatory whitespace \d+ # decimal number \s* # optional whitespace : # a real colon . * # anything not a newline till end of line ) """) text = open(sys.argv[1]).read() # read the whole file for paragraph in text.split("\n"): # split on unix end of lines p = match.sub("

\g

",paragraph) print p ## ---------------------------- # the one liner does not run. # python -c 'import sys,re; for p in open(sys.argv[1]).read().split("\n\n"): print re.sub(r"(?ms)\A(Chapter\s+\d+\s*:.*)","

\g0

",p)' ## ---------------------------- match = re.compile(r"(?ms)^START(.*?)^END") # s makes . span line boundaries # m makes ^ match at the beginning of the string and at the beginning of each line chunk = 0 for paragraph in open(sys.argv[1]).read().split("\n\n"): chunk += 1 fnd = match.findall(paragraph) if fnd: print "chunk %d in %s has <<%s>>" % (chunk,sys.argv[1],">>,<<".join(fnd)) ## ---------------------------- # @@PLEAC@@_6.7 import sys # Read the whole file and split chunks = open(sys.argv[1]).read().split() # on whitespace chunks = open(sys.argv[1]).read().split("\n") # on line ends # splitting on pattern import re pattern = r"x" chunks = re.split(pattern, open(sys.argv[1]).read()) ##----------------------------- chunks = re.split(r"(?m)^\.(Ch|Se|Ss)$",open(sys.argv[1]).read()) print "I read %d chunks." % (len(chunks)) # without delimiters chunks = re.split(r"(?m)^\.(?:Ch|Se|Ss)$",open(sys.argv[1]).read()) # with delimiters chunks = re.split(r"(?m)^(\.(?:Ch|Se|Ss))$",open(sys.argv[1]).read()) # with delimiters at chunkstart chunks = re.findall(r"""(?xms) # multiline, dot matches lineend, allow comments ((?:^\.)? # consume the separator if present .*?) # match everything but not greedy (?= # end the match on this but dont consume it (?: # dont put into group [1] ^\.(?:Ch|Se|Ss)$ # either end on one of the roff commands |\Z # or end of text ) )""", open(sys.argv[1]).read()) # [1] if "?:" is removed the result holds tuples: ('.Ch\nchapter x','.Ch') # which might be more usefull. # @@PLEAC@@_6.8 ##----------------------------- # Python doesn't have perl's range operators # If you want to only use a selected line range, use enumerate # (though note that indexing starts at zero: for i, line in enumerate(myfile): if firstlinenum <= i < lastlinenum: dosomethingwith(line) # Using patterned ranges is slightly trickier - # You need to search for the first pattern then # search for the next pattern: import re for line in myfile: if re.match(pat1, line): break dosomethingwith(line) # Only if pat1 can be on same line as pat2 for line in myfile: if re.match(pat2, line): break dosomethingwith(line) ##----------------------------- # If you need to extract ranges a lot, the following generator funcs # may be useful: def extract_range(myfile, start, finish): for i, line in enumerate(myfile): if start <= i < finish: yield line elif i == finish: break for line in extract_range(open("/etc/passwd"), 3, 5): print line def patterned_range(myfile, startpat, endpat=None): startpat = re.compile(startpat) if endpat is not None: endpat = re.compile(endpat) in_range = False for line in myfile: if re.match(startpat, line): in_range = True if in_range: yield line if endpat is not None and re.match(endpat, line): break # DO NOT DO THIS. Use the email module instead for line in patterned_range(msg, "^From:?", "^$"): pass #... # @@PLEAC@@_6.9 tests = (("list.?",r"^list\..$"), ("project.*",r"^project\..*$"), ("*old",r"^.*old$"), ("type*.[ch]",r"^type.*\.[ch]$"), ("*.*",r"^.*\..*$"), ("*",r"^.*$"), ) # The book says convert "*","?","[","]" all other characters will be quoted. # The book uses "\Q" which escapes any characters that would otherwise be # treated as regular expression. # Escaping every char fails as "\s" is not "s" in a regex. def glob2pat(globstr): pat = globstr.replace("\\",r"\\") pat = pat.replace(".",r"\.").replace("?",r".").replace("*",r".*") return "^"+pat+"$" for globstr, patstr in tests: g2p = glob2pat(globstr) if g2p != patstr: print globstr, "failed! Should be", patstr, "but was", g2p # @@PLEAC@@_6.10 # @@INCLUDE@@ include/python/ch06/popgrep1 #----------------------------- # @@INCLUDE@@ include/python/ch06/popgrep2 #----------------------------- # @@INCLUDE@@ include/python/ch06/popgrep3 #----------------------------- # @@INCLUDE@@ include/python/ch06/grepauth #----------------------------- # @@PLEAC@@_6.11 # Testing for a Valid Pattern import re while True: pat = raw_input("Pattern? ") try: re.compile(pat) except re.error, err: print "INVALID PATTERN", err continue break # ---- def is_valid_pattern(pat): try: re.compile(pat) except re.error: return False return True # ---- # @@INCLUDE@@ include/python/ch06/paragrep # ---- # as we dont evaluate patterns the attack :: # # $pat = "You lose @{[ system('rm -rf *']} big here"; # # does not work. # @@PLEAC@@_6.12 # @@INCLUDE@@ include/python/ch06/localeg # @@PLEAC@@_6.13 ##----------------------------- import difflib matchlist = ["ape", "apple", "lapel", "peach", "puppy"] print difflib.get_close_matches("appel", matchlist) #=> ['lapel', 'apple', 'ape'] ##----------------------------- # Also see: # http://www.personal.psu.edu/staff/i/u/iua1/python/apse/ # http://www.bio.cam.ac.uk/~mw263/pyagrep.html # @@PLEAC@@_6.14 ##----------------------------- # To search (potentially) repeatedly for a pattern, use re.finditer(): # DO NOT DO THIS. Split on commas and convert elems using int() mystr = "3,4,5,9,120" for match in re.finditer("(\d+)", mystr): n = match.group(0) if n == "9": break # '120' will never be matched print "Found number", n # matches know their end position mystr = "The year 1752 lost 10 days on the 3rd of September" x = re.finditer("(\d+)", mystr) for match in x: n = match.group(0) print "Found number", n tail = re.match("(\S+)", mystr[match.end():]) if tail: print "Found %s after the last number."%tail.group(0) # @@PLEAC@@_6.15 # Python's regexes are based on Perl's, so it has the non-greedy # '*?', '+?', and '??' versions of '*', '+', and '?'. # DO NOT DO THIS. import htmllib, formatter, etc, instead #----------------------------- # greedy pattern txt = re.sub("<.*>", "", txt) # try to remove tags, very badly # non-greedy pattern txt = re.sub("<.*?>", "", txt) # try to remove tags, still rather badly #----------------------------- txt = "this and that are important Oh, me too!" print re.findall("(.*?)", txt ##----------------------------- print re.findall("/BEGIN((?:(?!BEGIN).)*)END/", txt) ##----------------------------- print re.findall("((?:(?!|).)*)", txt) ##----------------------------- print re.findall("((?:(?!<[ib]>).)*)", txt) ##----------------------------- print re.findall(""" [^<]* # stuff not possibly bad, and not possibly the end. (?: # at this point, we can have '<' if not part of something bad (?! ) # what we can't have < # okay, so match the '<' [^<]* # and continue with more safe stuff ) * """, re.VERBOSE, txt) ##----------------------------- # @@PLEAC@@_6.16 ##----------------------------- text = """ This is a test test of the duplicate word finder. """ words = text.split() for curr, next in zip(words[:-1], words[1:]): if curr.upper() == next.upper(): print "Duplicate word '%s' found." % curr # DON'T DO THIS import re pat = r""" \b # start at a word boundary (begin letters) (\S+) # find chunk of non-whitespace \b # until another word boundary (end letters) ( \s+ # separated by some whitespace \1 # and that very same chunk again \b # until another word boundary ) + # one or more sets of those """ for match in re.finditer(pat, text, flags=re.VERBOSE|re.IGNORECASE): print "Duplicate word '%s' found." % match.group(1) ##----------------------------- a = 'nobody'; b = 'bodysnatcher'; text = a+" "+b pat = r"^(\w+)(\w+) \2(\w+)$" for match in re.finditer(pat, text): m1, m2, m3 = match.groups() print m2, "overlaps in %s-%s-%s"%(m1, m2, m3) ##----------------------------- pat = r"^(\w+?)(\w+) \2(\w+)$" ##----------------------------- try: while True: factor = re.match(r"^(oo+?)\1+$", n).group(1) n = re.sub(factor, "o", n) print len(factor) except AttributeError: print len(n) ##----------------------------- def diaphantine(n, x, y, z): pat = r"^(o*)\1{%s}(o*)\2{%s}(o*)\3{%s}$"%(x-1, y-1, z-1) text = "o"*n try: vals = [len(v) for v in re.match(pat, text).groups()] except ValueError: print "No solutions." else: print "One solution is: x=%s, y=%s, z=%s."%tuple(vals) diaphantine(n=281, x=12, y=15, z=16) # @@PLEAC@@_6.17 ##----------------------------- # Pass any of the following patterns to re.match(), etc pat = "ALPHA|BETA" pat = "^(?=.*ALPHA)(?=.*BETA)" pat = "ALPHA.*BETA|BETA.*ALPHA" pat = "^(?:(?!PAT).)*$" pat = "(?=^(?:(?!BAD).)*$)GOOD" ##----------------------------- if not re.match(pattern, text): something() ##----------------------------- if re.match(pat1, text) and re.match(pat2, text): something() ##----------------------------- if re.match(pat1, text) or re.match(pat2, text): something() ##----------------------------- # DON'T DO THIS. """minigrep - trivial grep""" import sys, re pat = sys.argv[1] for line in sys.stdin: if re.match(pat, line): print line[:-1] ##----------------------------- if re.match(r"^(?=.*bell)(?=.*lab)", "labelled"): something() ##----------------------------- if re.search("bell", s) and re.search("lab", s): something() ##----------------------------- if re.match(""" ^ # start of string (?= # zero-width lookahead .* # any amount of intervening stuff bell # the desired bell string ) # rewind, since we were only looking (?= # and do the same thing .* # any amount of intervening stuff lab # and the lab part ) """, murray_hill, re.DOTALL | re.VERBOSE): print "Looks like Bell Labs might be in Murray Hill!" ##----------------------------- if re.match(r"(?:^.*bell.*lab)|(?:^.*lab.*bell)", "labelled"): something() ##----------------------------- brand = "labelled" if re.match(""" (?: # non-capturing grouper ^ .*? # any amount of stuff at the front bell # look for a bell .*? # followed by any amount of anything lab # look for a lab ) # end grouper | # otherwise, try the other direction (?: # non-capturing grouper ^ .*? # any amount of stuff at the front lab # look for a lab .*? # followed by any amount of anything bell # followed by a bell ) # end grouper """, brand, re.DOTALL | re.VERBOSE): print "Our brand has bell and lab separate." ##----------------------------- x = "odlaw" if re.match("^(?:(?!waldo).)*$", x): print "There's no waldo here!" ##----------------------------- if re.match(""" ^ # start of string (?: # non-capturing grouper (?! # look ahead negation waldo # is he ahead of us now? ) # is so, the negation failed . # any character (cuzza /s) ) * # repeat that grouping 0 or more $ # through the end of the string """, x, re.VERBOSE | re.DOTALL): print "There's no waldo here!\n"; ##----------------------------- # @@PLEAC@@_6.18 # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_6.19 ##----------------------------- from email._parseaddr import AddressList print AddressList("fred&barney@stonehenge.com").addresslist[0] print AddressList("fred&barney@stonehenge.com (Hanna Barbara)").addresslist[0] name, address = AddressList("Mr Fooby Blah ").addresslist[0] print "%s's address is '%s'"%(name, address) # @@PLEAC@@_6.20 ##----------------------------- # Assuming the strings all start with different letters, or you don't # mind there being precedence, use the startswith string method: def get_action(answer): answer = answer.lower() actions = ["send", "stop", "abort", "list", "end"] for action in actions: if action.startswith(answer): return action print "Action is %s."%get_action("L") #=> Action is list. ##----------------------------- #DON'T DO THIS: import re answer = "ab" answer = re.escape(answer.strip()) for action in ("SEND", "STOP", "ABORT", "LIST", "EDIT"): if re.match(answer, action, flags=re.IGNORECASE): print "Action is %s."%action.lower() ##----------------------------- import re, sys def handle_cmd(cmd): cmd = re.escape(cmd.strip()) for name, action in {"edit": invoke_editor, "send": deliver_message, "list": lambda: system(pager, myfile), "abort": sys.exit, } if re.match(cmd, name, flags=re.IGNORECASE): action() break else: print "Unknown command:", cmd handle_cmd("ab") # @@PLEAC@@_6.21 ##----------------------------- # urlify - wrap HTML links around URL-like constructs import re, sys, fileinput def urlify_string(s): urls = r'(http|telnet|gopher|file|wais|ftp)' ltrs = r'\w'; gunk = r'/#~:.?+=&%@!\-' punc = r'.:?\-' any = ltrs + gunk + punc pat = re.compile(r""" \b # start at word boundary ( # begin \1 { %(urls)s : # need resource and a colon [%(any)s] +? # followed by one or more # of any valid character, but # be conservative and take only # what you need to.... ) # end \1 } (?= # look-ahead non-consumptive assertion [%(punc)s]* # either 0 or more punctuation [^%(any)s] # followed by a non-url char | # or else $ # then end of the string ) """%locals(), re.VERBOSE | re.IGNORECASE) return re.sub(pat, r"\1", s) if __name__ == "__main__": for line in fileinput.input(): print urlify_string(line) # @@PLEAC@@_6.22 ##----------------------------- # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_6.23 # The majority of regexes in this section are either partially # or completely The Wrong Thing to Do. ##----------------------------- # DON'T DO THIS. Use a Roman Numeral module, etc. (since # you need one anyway to calculate values) pat = r"^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$" re.match(pat, "mcmlxcvii") ##----------------------------- txt = "one two three four five" # If the words are cleanly delimited just split and rejoin: word1, word2, rest = txt.split(" ", 2) print " ".join([word2, word1, rest]) # Otherwise: frompat = r"(\S+)(\s+)(\S+)" topat = r"\3\2\1" print re.sub(frompat, topat, txt) ##----------------------------- print str.split("=") # DON'T DO THIS pat = r"(\w+)\s*=\s*(.*)\s*$" print re.match(pat, "key=val").groups() ##----------------------------- line = "such a very very very very very very very very very very very very very long line" if len(line) > 80: process(line) # DON'T DO THIS pat = ".{80,}" if re.match(pat, line): process(line) ##----------------------------- dt = time.strptime("12/11/05 12:34:56", "%d/%m/%y %H:%M:%S") # DON'T DO THIS pat = r"(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)" dt = re.match(pat, "12/11/05 12:34:56").groups() ##----------------------------- txt = "/usr/bin/python" print txt.replace("/usr/bin", "/usr/local/bin") # Alternatively for file operations use os.path, shutil, etc. # DON'T DO THIS print re.sub("/usr/bin", "/usr/local/bin", txt) ##----------------------------- import re def unescape_hex(matchobj): return chr(int(matchobj.groups(0)[0], 16)) txt = re.sub(r"%([0-9A-Fa-f][0-9A-Fa-f])", unescape_hex, txt) # Assuming that the hex escaping is well-behaved, an alternative is: def unescape_hex(seg): return chr(int(seg[:2], 16)) + seg[2:] segs = txt.split("%") txt = segs[0] + "".join(unescape_hex(seg) for seg in segs[1:]) ##----------------------------- txt = re.sub(r""" /\* # Match the opening delimiter .*? # Match a minimal number of characters \*/ # Match the closing delimiter """, "", txt, re.VERBOSE) ##----------------------------- txt.strip() # DON'T DO THIS txt = re.sub(r"^\s+", "", txt) txt = re.sub(r"\s+$", "", txt) ##----------------------------- txt.replace("\\n", "\n") # DON'T DO THIS txt = re.sub("\\n", "\n", txt) ##----------------------------- txt = re.sub("^.*::", "") ##----------------------------- import socket socket.inet_aton(txt) # Will raise an error if incorrect # DON'T DO THIS. octseg =r"([01]?\d\d|2[0-4]\d|25[0-5])" dot = r"\." pat = "^" + octseg + dot + octseg + dot + octseg + dot + octseg + "$" if not re.match(pat, txt, re.VERBOSE) raise ValueError # Defitely DON'T DO THIS. pat = r"""^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\. ([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$""" ##----------------------------- fname = os.path.basename(path) # DON'T DO THIS. fname = re.sub("^.*/", "", path) ##----------------------------- import os try: tc = os.environ["TERMCAP"] except KeyError: cols = 80 else: cols = re.match(":co#(\d+):").groups(1) ##----------------------------- # (not quite equivalent to the Perl version) name = os.path.basename(sys.argv[0]) # DON'T DO THIS. name = re.sub("^.*/", "", sys.argv[0]) ##----------------------------- if sys.platform != "linux": raise SystemExit("This isn't Linux") ##----------------------------- txt = re.sub(r"\n\s+", " ", txt) # In many cases you could just use: txt = txt.replace("\n", " ") ##----------------------------- nums = re.findall(r"\d+\.?\d*|\.\d+", txt) ##----------------------------- # If the words are clearly delimited just use: capwords = [word for word in txt.split() if word.isupper()] # Otherwise capwords = [word for word in re.findall(r"\b(\S+)\b", txt) if word.isupper()] # (probably) DON'T DO THIS. capwords = re.findall(r"(\b[^\Wa-z0-9_]+\b)", txt) ##----------------------------- # If the words are clearly delimited just use: lowords = [word for word in txt.split() if word.islower()] # Otherwise lowords = [word for word in re.findall(r"\b(\S+)\b", txt) if word.islower()] # (probably) DON'T DO THIS. lowords = re.findall(r"(\b[^\WA-Z0-9_]+\b)", txt) ##----------------------------- # If the words are clearly delimited just use: icwords = [word for word in txt.split() if word.istitle()] # Otherwise icwords = [word for word in re.finditer(r"\b(\S+)\b") if word.istitle()] # DON'T DO THIS. icwords = re.findall(r"(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)", txt) ##----------------------------- # DON'T DO THIS - use HTMLParser, etc. links = re.findall(r"""]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>""", txt) ##----------------------------- names = txt.split() if len(names) == 3: initial = names[1][0] else: initial = "" # DON'T DO THIS. pat = "^\S+\s+(\S)\S*\s+\S" try: initial = re.match(pat, txt).group(1) except AttributeError: initial = "" ##----------------------------- txt = re.sub('"([^"]*)"', "``\1''", txt) ##----------------------------- sentences = [elem[0] for elem in re.findall(r"(.*?[!?.])( |\Z)", s)] ##----------------------------- import time dt = time.strptime(txt, "%Y-%m-%d") # DON'T DO THIS. year, month, day = re.match(r"(\d{4})-(\d\d)-(\d\d)", txt).groups() ##----------------------------- pat = r""" ^ (?: 1 \s (?: \d\d\d \s)? # 1, or 1 and area code | # ... or ... \(\d\d\d\) \s # area code with parens | # ... or ... (?: \+\d\d?\d? \s)? # optional +country code \d\d\d ([\s\-]) # and area code ) \d\d\d (\s|\1) # prefix (and area code separator) \d\d\d\d # exchange $ """ re.match(pat, txt, re.VERBOSE) ##----------------------------- re.match(r"\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b", txt, re.IGNORECASE) ##----------------------------- for line in file(fname, "Ur"): #Universal newlines process(line) # DON'T DO THIS lines = [re.sub(r"^([^\012\015]*)(\012\015?|\015\012?)", "", line) for line in file(fname)] ##----------------------------- # @@PLEAC@@_7.0 for line in open("/usr/local/widgets/data"): if blue in line: print line[:-1] #--------- import sys, re pattern = re.compile(r"\d") for line in sys.stdin: if not pattern.search(line): sys.stderr.write("No digit found.\n") sys.stdout.write("Read: " + line) sys.stdout.close() #--------- logfile = open("/tmp/log", "w") #--------- logfile.close() #--------- print>>logfile, "Countdown initiated ..." print "You have 30 seconds to reach minimum safety distance." # DONT DO THIS import sys old_output, sys.stdout = sys.stdout, logfile print "Countdown initiated ..." sys.stdout = old_output print "You have 30 seconds to reach minimum safety distance." #--------- # @@PLEAC@@_7.1 # Python's open() function somewhat covers both perl's open() and # sysopen() as it has optional arguments for mode and buffering. source = open(path) sink = open(path, "w") #--------- # NOTE: almost no one uses the low-level os.open and os.fdopen # commands, so their inclusion here is just silly. If # os.fdopen(os.open(...)) were needed often, it would be turned # into its own function. Instead, I'll use 'fd' to hint that # os.open returns a file descriptor import os source_fd = os.open(path, os.O_RDONLY) source = os.fdopen(fd) sink_fd = os.open(path, os.O_WRONLY) sink = os.fdopen(sink_fd) #--------- myfile = open(filename, "w") fd = os.open(filename, os.O_WRONLY | os.O_CREAT) myfile = open(filename, "r+") #--------- fd = os.open(name, flags) fd = os.open(name, flags, mode) #--------- myfile = open(path) fd = os.open(path, os.O_RDONLY) #----------------------------- myfile = open(path, "w") fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT) fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT, 0600) #----------------------------- fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT) fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT, 0600) #----------------------------- myfile = open(path, "a") fd = os.open(path, os.O_WRONLY|os.O_APPEND|os.O_CREAT) fd = os.open(path, os.O_WRONLY|os.O_APPEND|s.O_CREAT, 0600) #----------------------------- fd = os.open(path, os.O_WRONLY|os.O_APPEND) #----------------------------- myfile = open(path, "rw") fd = os.open(path, os.O_RDWR) #----------------------------- fd = os.open(path, os.O_RDWR|os.O_CREAT) fd = os.open(path, os.O_RDWR|os.O_CREAT, 0600) #----------------------------- fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT) fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT, 0600) #----------------------------- # @@PLEAC@@_7.2 # Nothing different needs to be done with Python # @@PLEAC@@_7.3 import os filename = os.path.expanduser(filename) # @@PLEAC@@_7.4 myfile = open(filename) # raise an exception on error try: myfile = open(filename) except IOError, err: raise AssertionError("Couldn't open %s for reading : %s" % (filename, err.strerror)) # @@PLEAC@@_7.5 import tempfile myfile = tempfile.TemporaryFile() #----------------------------- # NOTE: The TemporaryFile() call is much more appropriate # I would not suggest using this code for real work. import os, tempfile while True: name = os.tmpnam() try: fd = os.open(name, os.O_RDWR|os.O_CREAT|os.O_EXCL) break except os.error: pass myfile = tempfile.TemporaryFileWrapper(os.fdopen(fd), name) # now go on to use the file ... #----------------------------- import os while True: tmpname = os.tmpnam() fd = os.open(tmpnam, os.O_RDWR | os.O_CREAT | os.O_EXCL) if fd: tmpfile = os.fdopen(fd) break os.remove(tmpnam) #----------------------------- import tempfile myfile = tempfile.TemporaryFile(bufsize = 0) for i in range(10): print>>myfile, i myfile.seek(0) print "Tmp file has:", myfile.read() #----------------------------- # @@PLEAC@@_7.6 DATA = """\ your data goes here """ for line in DATA.split("\n"): pass # process the line # @@PLEAC@@_7.7 for line in sys.stdin: pass # do something with the line # processing a list of files from commandline import fileinput for line in fileinput.input(): do something with the line #----------------------------- import sys def do_with(myfile): for line in myfile: print line[:-1] filenames = sys.argv[1:] if filenames: for filename in filenames: try: do_with(open(filename)) except IOError, err: sys.stderr.write("Can't open %s: %s\n" % (filename, err.strerror)) continue else: do_with(sys.stdin) #----------------------------- import sys, glob ARGV = sys.argv[1:] or glob.glob("*.[Cch]") #----------------------------- # NOTE: the getopt module is the prefered mechanism for reading # command line arguments import sys args = sys.argv[1:] chop_first = 0 if args and args[0] == "-c": chop_first += 1 args = args[1:] # arg demo 2: Process optional -NUMBER flag # NOTE: You just wouldn't process things this way for Python, # but I'm trying to preserve the same semantics. import sys, re digit_pattern = re.compile(r"-(\d+)$") args = sys.argv[1:] if args: match = digit_pattern.match(args[0]) if match: columns = int(match.group(1)) args = args[1:] # NOTE: here's the more idiomatic way, which also checks # for the "--" or a non "-" argument to stop processing args = sys.argv[1:] for i in range(len(args)): arg = args[i] if arg == "--" or not arg.startwith("-"): break if arg[1:].isdigit(): columns = int(arg[1:]) continue # arg demo 3: Process clustering -a, -i, -n, or -u flags import sys, getopt try: args, filenames = getopt.getopt(sys.argv[1:], "ainu") except getopt.error: raise SystemExit("usage: %s [-ainu] [filenames] ..." % sys.argv[0]) append = ignore_ints = nostdout = unbuffer = 0 for k, v in args: if k == "-a": append += 1 elif k == "-i": ignore_ints += 1 elif k == "-n": nostdout += 1 elif k == "-u": unbuffer += 1 else: raise AssertionError("Unexpected argument: %s" % k) #----------------------------- # Note: Idiomatic Perl get translated to idiomatic Python import fileinput for line in fileinput.input(): sys.stdout.write("%s:%s:%s" % (fileinput.filename(), fileinput.filelineno(), line)) #----------------------------- #!/usr/bin/env python # findlogin1 - print all lines containing the string "login" for line in fileinput.input(): # loop over files on command line if line.find("login") != -1: sys.stdout.write(line) #----------------------------- #!/usr/bin/env python # lowercase - turn all lines into lowercase ### NOTE: I don't know how to do locales in Python for line in fileinput.input(): # loop over files on command line sys.stdout.write(line.lower()) #----------------------------- #!/usr/bin/env python # NOTE: The Perl code appears buggy, in that "Q__END__W" is considered # to be a __END__ and words after the __END__ on the same line # are included in the count!!! # countchunks - count how many words are used. # skip comments, and bail on file if __END__ # or __DATA__ seen. chunks = 0 for line in fileinput.input(): for word in line.split(): if word.startswith("#"): continue if word in ("__DATA__", "__END__"): fileinput.close() break chunks += 1 print "Found", chunks, "chunks" # @@PLEAC@@_7.8 import shutil old = open("old") new = open("new","w") for line in old: new.writeline(line) new.close() old.close() shutil.copyfile("old", "old.orig") shutil.copyfile("new", "old") # insert lines at line 20: for i, line in enumerate(old): if i == 20: print>>new, "Extra line 1\n" print>>new, "Extra line 2\n" print>>new, line # or delete lines 20 through 30: for i, line in enumerate(old): if 20 <= i <= 30: continue print>>new, line # @@PLEAC@@_7.9 # modifying with "-i" commandline switch is a perl feature # python has fileinput import fileinput, sys, time today = time.strftime("%Y-%m-%d",time.localtime()) for line in fileinput.input(inplace=1, backup=".orig"): sys.stdout.write(line.replace("DATE",today)) # set up to iterate over the *.c files in the current directory, # editing in place and saving the old file with a .orig extension. import glob, re match = re.compile("(?<=[pP])earl") files = fileinput.FileInput(glob.glob("*.c"), inplace=1, backup=".orig") while True: line = files.readline() sys.stderr.write(line) if not line: break if files.isfirstline(): sys.stdout.write("This line should appear at the top of each file\n") sys.stdout.write(match.sub("erl",line)) # @@PLEAC@@_7.10 #----------------------------- myfile = open(filename, "r+") data = myfile.read() # change data here myfile.seek(0, 0) myfile.write(data) myfile.truncate(myfile.tell()) myfile.close() #----------------------------- myfile = open(filename, "r+") data = [process(line) for line in myfile] myfile.seek(0, 0) myfile.writelines(data) myfile.truncate(myfile.tell()) myfile.close() #----------------------------- # @@PLEAC@@_7.11 import fcntl myfile = open(somepath, 'r+') fcntl.flock(myfile, fcntl.LOCK_EX) # update file, then... myfile.close() #----------------------------- fcntl.LOCK_SH fcntl.LOCK_EX fcntl.LOCK_NB fcntl.LOCK_UN #----------------------------- import warnings try: fcntl.flock(myfile, fcntl.LOCK_EX|fcntl.LOCK_NB) except IOError: warnings.warn("can't immediately write-lock the file ($!), blocking ...") fcntl.flock(myfile, fcntl.LOCK_EX) #----------------------------- fcntl.flock(myfile, fcntl.LOCK_UN) #----------------------------- # option "r+" instead "w+" stops python from truncating the file on opening # when another process might well hold an advisory exclusive lock on it. myfile = open(somepath, "r+") fcntl.flock(myfile, fcntl.LOCK_EX) myfile.seek(0, 0) myfile.truncate(0) print>>myfile, "\n" # or myfile.write("\n") myfile.close() #----------------------------- # @@PLEAC@@_7.12 # Python doesn't have command buffering. Files can have buffering set, # when opened: myfile = open(filename, "r", buffering=0) #Unbuffered myfile = open(filename, "r", buffering=1) #Line buffered myfile = open(filename, "r", buffering=100) #Use buffer of (approx) 100 bytes myfile = open(filename, "r", buffering=-1) #Use system default myfile.flush() # Flush the I/O buffer # stdout is treated as a file. If you ever need to flush it, do so: import sys sys.stdout.flush() # DON'T DO THIS. Use urllib, etc. import socket mysock = socket.socket() mysock.connect(('www.perl.com', 80)) # mysock.setblocking(True) mysock.send("GET /index.html http/1.1\n\n") f = mysock.makefile() print "Doc is:" for line in f: print line[:-1] # @@PLEAC@@_7.13 import select while True: rlist, wlist, xlist = select.select([file1, file2, file3], [], [], 0) for r in rlist: pass # Do something with the file handle # @@PLEAC@@_7.14 # @@SKIP@@ Use select.poll() on Unix systems. # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_7.15 # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_7.16 # NOTE: this is all much easier in Python def subroutine(myfile): print>>myfile, "Hello, file" variable = myfile subroutine(variable) # @@PLEAC@@_7.17 # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_7.18 for myfile in files: print>>myfile, stuff_to_print # NOTE: This is unix specific import os file = os.popen("tee file1 file2 file3 >/dev/null", "w") print>>myfile, "whatever" # NOTE: the "make STDOUT go to three files" is bad programming style import os, sys sys.stdout.file = os.popen("tee file1 file2 file3", "w") print "whatever" sys.stdout.close() # You could use a utility object to redirect writes: class FileDispatcher(object): def __init__(self, *files): self.files = files def write(self, msg): for f in self.files: f.write(msg) def close(self): for f in self.files: f.close() x = open("C:/test1.txt", "w") y = open("C:/test2.txt", "w") z = open("C:/test3.txt", "w") fd = FileDispatcher(x, y, z) print>>fd, "Foo" # equiv to fd.write("Foo"); fd.write("\n") print>>fd, "Testing" fd.close() # @@PLEAC@@_7.19 import os myfile = os.fdopen(fdnum) # open the descriptor itself myfile = os.fdopen(os.dup(fdnum)) # open to a copy of the descriptor ### outcopy = os.fdopen(os.dup(sys.stdin.fileno()), "w") incopy = os.fdopen(os.dup(sys.stdin.fileno()), "r") # @@PLEAC@@_7.20 original = open("C:/test.txt") alias = original alias.close() print original.closed #=>True import copy original = open("C:/test.txt") dupe = copy.copy(original) dupe.close() print original.closed #=>False # DON'T DO THIS. import sys oldstderr = sys.stderr oldstdout = sys.stdout sys.stderr = open("C:/stderrfile.txt") sys.stdout = open("C:/stdoutfile.txt") print "Blah" # Will be written to C:/stdoutfile.txt sys.stdout.close() sys.stdout = oldstdout sys.stderr = oldstderr # @@PLEAC@@_7.21 # @@INCOMPLETE@@ # @@INCOMPLETE@@ # @@PLEAC@@_7.22 # On Windows: import msvcrt myfile.seek(5, 0) msvcrt.locking(myfile.fileno(), msvcrt.LK_NBLCK, 3) # On Unix: import fcntl fcntl.lockf(myfile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB, 3, 5) # ^^PLEAC^^_8.0 #----------------------------- for line in DATAFILE: line = line.rstrip() size = len(line) print size # output size of line #----------------------------- for line in datafile: print length(line.rstrip()) # output size of line #----------------------------- lines = datafile.readlines() #----------------------------- whole_file = myfile.read() #----------------------------- ## No direct equivalent in Python #% perl -040 -e '$word = <>; print "First word is $word\n";' #----------------------------- ## No direct equivalent in Python #% perl -ne 'BEGIN { $/="%%\n" } chomp; print if /Unix/i' fortune.dat #----------------------------- print>>myfile, "One", "two", "three" # "One two three" print "Baa baa black sheep." # Sent to default output file #-------------------