#!/usr/bin/awk -f
# mpfextract: Decodes Microsoft's Clip Art Organizer 2002 .mpf
# files into the separate clip art files contained within
# (usually .wmf of .jpg). Such files can be downloaded from
# free of charge, if you have Microsoft
# Office or another product with their Clip Art Organizer.
# This simple script allows such clip art to be used in other
# products as well, by extracting it from the container format
# produced by the site. (But you still need to have a
# license for the aforementioned Microsoft products.)
#
# This script's raison d'etre is to get Microsoft's clipart
# usable my Macintosh, but I only have Office for Windows (which
# I'm no longer using).
#
# Written by Kimmo Kulovesi , 2004-09-04
#
# ABSOLUTELY NO WARRANTY - USE AT YOUR OWN RISK ONLY!
# You may distribute this freely, just keep the notices in
# this file intact, and mark modified versions as such.
#
#
# Requirements:
# - An Awk interpreter (e.g. GNU Awk). OS X comes with this.
# - A BASE64 decoder capable of reading from stdin and outputting to
# a file named at the end of the command line. One such is called
# base64, but you can change the decoder command line below.
# OS X does _not_ come with this (to my knowledge) and Fink
# does not offer one at the time of writing. I downloaded the
# sources of base64 from
# and did "./configure ; make ; make install".
#
# Usage:
# - Basic usage to decode filename.mpf:
# mpfextract filename.mpf
# - You can also specify an output directory:
# mpfextract -v "destdir=/pics/clipart" filename.mpf
#
# Caveats:
# - Does not really parse the XML input, just assumes it to be
# formatted in a certain way. Files in this format are currently
# generated when downloading Clip Art from office.microsoft.com,
# but things might change at any time.
# - Will probably overwrite any existing filenames without asking,
# unless your decoder refuses to do so.
#
# Notes:
# - When downloading clip art from Microsoft's site, your browser
# must not reveal that it's running on a Macintosh. If it does,
# the site won't let you download .mpf format clip art, giving
# you the clip art in some binary format I don't know how to
# decode. Use e.g. Opera or Firefox and possibly set it to
# lie the user agent, and then select "Clip Art Organizer 2002"
# when presented with a choice of formats. The resulting download
# should be a .mpf file which this script can decode.
BEGIN {
# Change this to the command line of your base64 decoder:
BASE64DECODER="base64 -d -- -"
# The decoder must read from stdin and write to a file
# given as the last command line argument.
### DON'T EDIT BELOW THIS (unless you want to ;>)
FS="[ \t<>:\"]+"
if (destdir != "")
sub(/$/, "/", destdir)
}
base64 == 1 {
if (filename == "") {
print "ERROR: No filename for file on line " NR "!" >"/dev/stderr"
exit 1
}
print "Decoding " filename "..." >"/dev/stderr"
cmd = BASE64DECODER " " filename
print $0 | cmd
base64 = 0
filename = ""
next
}
$2 == "C" && $3 == "filepath" {
sub(/[^>]+>/, "", $0)
sub(/<.*+/, "", $0)
sub(/^\//, "", $0) # Strip leading /
gsub(/\//, "_", $0) # Convert further /'s to underscores
filename = destdir $0
next
}
$2 == "C" && $3 == "contents" {
if ($6 != "bin.base64") {
print "ERROR: Unknown content type " $6 "!" >"/dev/stderr"
exit 1
}
base64 = 1
next
}