#!/usr/bin/awk -f # mpfextract: Decodes Microsoft's Clip Art Organizer 2002 .mpf # files into the separate clip art files contained within # (usually .wmf of .jpg). Such files can be downloaded from # free of charge, if you have Microsoft # Office or another product with their Clip Art Organizer. # This simple script allows such clip art to be used in other # products as well, by extracting it from the container format # produced by the site. (But you still need to have a # license for the aforementioned Microsoft products.) # # This script's raison d'etre is to get Microsoft's clipart # usable my Macintosh, but I only have Office for Windows (which # I'm no longer using). # # Written by Kimmo Kulovesi , 2004-09-04 # # ABSOLUTELY NO WARRANTY - USE AT YOUR OWN RISK ONLY! # You may distribute this freely, just keep the notices in # this file intact, and mark modified versions as such. # # # Requirements: # - An Awk interpreter (e.g. GNU Awk). OS X comes with this. # - A BASE64 decoder capable of reading from stdin and outputting to # a file named at the end of the command line. One such is called # base64, but you can change the decoder command line below. # OS X does _not_ come with this (to my knowledge) and Fink # does not offer one at the time of writing. I downloaded the # sources of base64 from # and did "./configure ; make ; make install". # # Usage: # - Basic usage to decode filename.mpf: # mpfextract filename.mpf # - You can also specify an output directory: # mpfextract -v "destdir=/pics/clipart" filename.mpf # # Caveats: # - Does not really parse the XML input, just assumes it to be # formatted in a certain way. Files in this format are currently # generated when downloading Clip Art from office.microsoft.com, # but things might change at any time. # - Will probably overwrite any existing filenames without asking, # unless your decoder refuses to do so. # # Notes: # - When downloading clip art from Microsoft's site, your browser # must not reveal that it's running on a Macintosh. If it does, # the site won't let you download .mpf format clip art, giving # you the clip art in some binary format I don't know how to # decode. Use e.g. Opera or Firefox and possibly set it to # lie the user agent, and then select "Clip Art Organizer 2002" # when presented with a choice of formats. The resulting download # should be a .mpf file which this script can decode. BEGIN { # Change this to the command line of your base64 decoder: BASE64DECODER="base64 -d -- -" # The decoder must read from stdin and write to a file # given as the last command line argument. ### DON'T EDIT BELOW THIS (unless you want to ;>) FS="[ \t<>:\"]+" if (destdir != "") sub(/$/, "/", destdir) } base64 == 1 { if (filename == "") { print "ERROR: No filename for file on line " NR "!" >"/dev/stderr" exit 1 } print "Decoding " filename "..." >"/dev/stderr" cmd = BASE64DECODER " " filename print $0 | cmd base64 = 0 filename = "" next } $2 == "C" && $3 == "filepath" { sub(/[^>]+>/, "", $0) sub(/<.*+/, "", $0) sub(/^\//, "", $0) # Strip leading / gsub(/\//, "_", $0) # Convert further /'s to underscores filename = destdir $0 next } $2 == "C" && $3 == "contents" { if ($6 != "bin.base64") { print "ERROR: Unknown content type " $6 "!" >"/dev/stderr" exit 1 } base64 = 1 next }