Dies ist die Support Website des Buches:

Das Python Praxisbuch
Der große Profi-Leitfaden für Programmierer
Farid Hajji
Addison Wesley / Pearson Education
ISBN 978-3-8273-2543-3 (Sep 2008), 1298 Seiten.

9. Dateien und das Dateisystem

Dateien

Die Funktion close

Die Funktion show_file:

def show_file(path_to_file):
    "Show content of a file"
    f = open(path_to_file, 'r')
    print f.read()

URLs:

Textdateien lesen und schreiben

Anwendung: Sortieren nach Wortenden

URLs:

#!/usr/bin/env python
# revwordlist.py -- sort a list of words according to their endings.

# This uses Jon Bentley's "Programming Pearls" trick:
# $ rev < word.list | sort | rev > words.rev

from __future__ import with_statement

WORDS_LIST = '/usr/share/dict/words'
WORDS_REV  = '/tmp/words.rev'

def sort_by_endings(wordlist_in=WORDS_LIST, wordlist_out=WORDS_REV):
    "Sort wordlist_in according to words endings into wordlist_out"
    with open(wordlist_in, 'r') as f_in:
        thelist = [word[::-1] for word in f_in.readlines()]
        thelist.sort()
        with open(wordlist_out, 'w') as f_out:
            f_out.writelines([word[::-1] for word in thelist])

if __name__ == '__main__':
    sort_by_endings()

revwordlist.py

#!/usr/bin/env python
# revwordlist2.py -- sort a list of words according to their endings.

# This uses Jon Bentley's "Programming Pearls" trick:
# $ rev < word.list | sort | rev > words.rev

from __future__ import with_statement

WORDS_LIST = '/usr/share/dict/words'
WORDS_REV  = '/tmp/words.rev'

def sort_by_endings(wordlist_in=WORDS_LIST, wordlist_out=WORDS_REV):
    "Sort wordlist_in according to words endings into wordlist_out"
    with open(wordlist_in, 'r') as f_in:
        thelist = []
        for word in f_in:
            thelist.append(word[::-1])
    
    thelist.sort()
    
    with open(wordlist_out, 'w') as f_out:
        for word in thelist:
            f_out.write(word[::-1])

if __name__ == '__main__':
    sort_by_endings()

revwordlist2.py

Binärdateien lesen und schreiben

URLs:

Anwendung: Stückweises Kopieren von Dateien

#!/usr/bin/env python
# chunkwise-copy.py -- copy a file chunkwise

from __future__ import with_statement

CHUNKSIZE = 4096

def chunkwise_copy(source, destination):
    "Copy file source into destination, using CHUNKSIZE sized chunks"
    with open(source, 'rb') as f_src:
        with open(destination, 'wb') as f_dest:
            buff = f_src.read(CHUNKSIZE)
            while len(buff) > 0:
                f_dest.write(buff)
                buff = f_src.read(CHUNKSIZE)

if __name__ == '__main__':
    import sys
    if len(sys.argv) != 3:
        print >>sys.stderr, "Usage:", sys.argv[0], "source destination"
        sys.exit(1)
    source, destination = sys.argv[1], sys.argv[2]
    chunkwise_copy(source, destination)

chunkwise-copy.py

Anwendung: Auslesen von mp3 ID3v1-Tags

#!/usr/bin/env python
# parseid3.py -- Use binary read to parse the ID3v1 header of an MP3 file.

def fetch_ID3tag(mp3):
    "Fetch the ID3 tag of filename mp3 as tuple, or None."
    try:
        f = open(mp3, 'rb')
        f.seek(-128, 2)
        buf = f.read(3+30+30+30+4+30+1) # read so many bytes
        f.close()
    except IOError:
        return None     # Can't fetch ID3 tag
    
    return parse_ID3tag(buf)

def parse_ID3tag(buf):
    "Parse an ID3 tag stored in buf and return a dictionary or None."
    if not buf.startswith('TAG'):
        return None     # Not an ID3 tag!
    
    id3 = {}
    id3['title']   = remove_padding(buf[3:33])    # 30 chars for title
    id3['artist']  = remove_padding(buf[33:63])   # 30 chars for artist
    id3['album']   = remove_padding(buf[63:93])   # 30 chars for album
    id3['year']    = remove_padding(buf[93:97])   #  4 chars for year
    
    raw_comment    = buf[97:127]                  # 30 chars for comment+track
    
    if ord(raw_comment[-2]) == 0 and ord(raw_comment[-1]) != 0:
        id3['track'] = ord(raw_comment[-1])
        id3['comment'] = remove_padding(raw_comment[:-2])
    else:
        id3['track'] = None
        id3['comment'] = remove_padding(raw_comment)
    
    id3['genre']   = ord(buf[127])                #  1 byte  for genre
    
    return id3

def remove_padding(inp):
    "Remove padding chars whitespace and NULL from string inp"
    out = inp.strip(' \x00')
    return out

if __name__ == '__main__':
    import sys, pprint
    if len(sys.argv) < 2:
        print "Usage:", sys.argv[0], "[file.mp3 ...]"
        sys.exit(0)
    
    for fname in sys.argv[1:]:
        print "ID3(%s) == " % fname
        pprint.pprint(fetch_ID3tag(fname))

parseid3.py

RAM-Dateien mit StringIO

Ein Beispiel für cStringIO:

from xml.etree.cElementTree import parse
from cStringIO import StringIO

xml_data = '<pers><sn>Doe</sn><fn>John</fn></pers>'

mf = StringIO(xml_data)
et = parse(mf)

mof = StringIO()
et.write(mof, encoding='UTF-8')
s = mof.getvalue()

print s

Screenshots:

Spezielle Dateiformate

Das Dateisystem

URLs:

Dateien verschieben oder löschen

Metadaten einer Datei

Plattformunabhängige Metadaten mit os.path.*

Plattformabhängige Metadaten mit os.stat (Unix)

URLs:

#!/usr/bin/env python
# statfile.py -- present os.stat results in a friendlier form.

import os, time, pwd, grp
from stat import *

def statfile(path):
    "Read meta-data of file path and return a ls -l like one-liner"
    stat_val  = os.stat(path)
    stat_dict = parse_stat(path, stat_val)
    stat_line = format_stat(stat_dict)
    return stat_line

def parse_stat(path, sval):
    "Parse os.stat tuple sval into a dictionary"
    d = {}
    d['path']      = path
    
    mode           = sval[ST_MODE]
    d['type']      = mode_to_filetype(mode)
    d['perms']     = permissions_to_string(S_IMODE(mode))
    d['dev']       = sval[ST_DEV]
    d['ino']       = sval[ST_INO]
    d['nlinks']    = sval[ST_NLINK]
    d['size']      = sval[ST_SIZE]
    d['owner']     = uid_to_user(sval[ST_UID])
    d['group']     = gid_to_group(sval[ST_GID])
    d['atime']     = epoch_to_string(sval[ST_ATIME])
    d['mtime']     = epoch_to_string(sval[ST_MTIME])
    d['ctime']     = epoch_to_string(sval[ST_CTIME])
    return d

def format_stat(sdict):
    "Format stat dictionary as an ls -l like one-liner"
    s = '(%(dev)d,%(ino)d) %(type)s%(perms)s %(nlinks)d \
%(owner)s %(group)s %(size)d %(mtime)s %(path)s' % sdict 
    return s
    
def mode_to_filetype(mode):
    "Return the file type, accoding to mode"
    if S_ISREG(mode):  return '-'      # regular file
    if S_ISDIR(mode):  return 'd'      # directory
    if S_ISLNK(mode):  return 'l'      # symlink
    if S_ISFIFO(mode): return 'p'      # FIFO
    if S_ISSOCK(mode): return 's'      # socket
    if S_ISCHR(mode):  return 'c'      # character device
    if S_ISBLK(mode):  return 'b'      # block device
    return '?'                         # unknown type: shouldn't happen

def permissions_to_string(perm):
    "Convert permissions (octal) into a string such as rwxr-xr-x"
    bits = [ '-', '-', '-', '-', '-', '-', '-', '-', '-' ]
    if perm & 0x1:    bits[8] = 'x'    # X for other
    if perm & 0x2:    bits[7] = 'w'    # W for other
    if perm & 0x4:    bits[6] = 'r'    # R for other
    if perm & 0x8:    bits[5] = 'x'    # X for group
    if perm & 0x10:   bits[4] = 'w'    # W for group
    if perm & 0x20:   bits[3] = 'r'    # R for group
    if perm & 0x40:   bits[2] = 'x'    # X for owner
    if perm & 0x80:   bits[1] = 'w'    # W for owner
    if perm & 0x100:  bits[0] = 'r'    # R for owner
    if perm & 0x400:  bits[5] = 'g'    # set-gid on exec
    if perm & 0x800:  bits[2] = 's'    # set-uid on exec
    return ''.join(bits)

def uid_to_user(uid):
    "Convert user-id to user name"
    return pwd.getpwuid(uid)[0]

def gid_to_group(gid):
    "Convert group-id to group name"
    return grp.getgrgid(gid)[0]

def epoch_to_string(tsec):
    "Convert Epoch-based seconds tsec into localtime string"
    return time.asctime(time.localtime(tsec))

if __name__ == '__main__':
    import sys
    for fname in sys.argv[1:]:
        print statfile(fname)

statfile.py

Das Dateisystem durchlaufen

os.listdir

glob.glob

Rekursives Durchlaufen mit os.walk

Die Funktion os.walk wird typischerweise so verwendet:

import os
import os.path

for root, dirs, files in os.walk('/path/to/files', topdown=True):
    # Proceed all files:
    for file_name in files:
        proceed_file(os.path.join(root, file_name))

    # Proceed all (sub-)directories:
    for dir_name in dirs:
        proceed_directory(os.path.join(root, dir_name))

Anwendung: Prüfsummen

#!/usr/bin/env python
# fingerprint.py -- fingerprints files with MD5 and SHA1

import hashlib

def compute_md5(file):
    digester = hashlib.md5()
    return _compute_digest(file, digester)

def compute_sha1(file):
    digester = hashlib.sha1()
    return _compute_digest(file, digester)

_BLOCKSIZE = 2048
def _compute_digest(file, digest_algorithm):
    while 1:
        chunk = file.read(_BLOCKSIZE)
        if not chunk: break
        digest_algorithm.update(chunk)
    file.close()
    return digest_algorithm.hexdigest()

if __name__ == '__main__':
    import sys, getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ms", [ "md5", "sha1" ])
    except getopt.GetoptError:
        print "Usage: %s [-m | -s] [path ...]" % sys.argv[0]
        sys.exit(0)

    m, s = None, None
    for o, a in opts:
        if o in ("-m", "--md5"): m = True
        if o in ("-s", "--sha1"): s = True
    if m is None and s is None: m = True;       # Default is MD5

    for pname in args:
        if m == True:
            print "MD5 (%s) = %s" % (pname, compute_md5(open(pname, "rb")))
        if s == True:
            print "SHA1 (%s) = %s" % (pname, compute_sha1(open(pname, "rb")))

fingerprint.py

#!/usr/bin/env python
# fpdir.py -- fingerprint whole directories with MD5 (later: and SHA1)

import os, os.path
import re
import fingerprint

md5fname = "md5.txt"
sha1fname = "sha1.txt"
matcher = re.compile(r'MD5 \((.*)\) = (.*)')

def make_fingerprints(path, verbose=None):
    for root, dirs, files in os.walk(path):
        if "TRANS.TBL" in files: files.remove("TRANS.TBL")
        if md5fname in files: files.remove(md5fname)
        files.sort()
        if len(files) != 0:
            # If we have some files, then (and only then) create md5fname
            md5file = open(os.path.join(root, md5fname), "w")
            for file in files:
                md5file.write("MD5 (%s) = %s\n" %
                              (file,
                               fingerprint.compute_md5(
                    open(os.path.join(root, file), "rb"))))
            md5file.close()
            if verbose is not None: print "Checksummed: %s" % root

def check_fingerprints(path, verbose=None):
    for root, dirs, files in os.walk(path):
        if md5fname in files:
            # There's a checksum file here. check its contents:
            md5file = open(os.path.join(root, md5fname))
            for line in md5file.readlines():
                # Line is in the form MD5 (fname) = cksum
                mo = matcher.match(line)
                if mo is not None:
                    fname, recorded_md5sum = mo.group(1), mo.group(2)
                    # fname is always relative to root
                    computed_md5sum = fingerprint.compute_md5(
                        open(os.path.join(root, fname), "rb"))
                    if recorded_md5sum != computed_md5sum:
                        print "< MD5 (%s) = %s" % (os.path.join(root, fname),
                                                   recorded_md5sum)
                        print "> MD5 (%s) = %s" % (os.path.join(root, fname),
                                                   computed_md5sum)
            md5file.close()
            if verbose is not None: print "Verified: %s" % root

if __name__ == '__main__':
    import sys, getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "mcv",
                                   ["make", "check", "verbose"])
    except getopt.GetoptError:
        print "Usage: %s [-m | -c] [-v] [path ...]" % sys.argv[0]
        sys.exit(0)

    m, c, v = None, None, None
    for o, a in opts:
        if o in ("-m", "--make"): m = True
        if o in ("-c", "--check"): c = True
        if o in ("-v", "--verbose"): v = True
    if m is None and c is None: c = True;      # Default is checking

    if len(args) == 0: args.append(".")

    for pname in args:
        if m == True: make_fingerprints(pname, v)
        if c == True: check_fingerprints(pname, v)

fpdir.py

Das shutil-Modul

Screenshots:

Zusammenfassung