#!/usr/bin/python

# A naive B-Course automatizer
# Written by V.H.Tuulos for Three Concepts: Probability 
# and patched by T. Silander
#
# Sends your data to the B-Course server, learns a
# Bayes network and outputs it as a HUGIN file and
# a nice PNG graph.
#
# last modified 20.2.2007
#
# usage:    auto-bourse.py data.txt secs-to-wait
#

import httplib, mimetypes, sys, urllib2, re, time


# B-Course host

BCOURSE = "b-course.cs.helsinki.fi"

# URLs

CGI_BIN = "/obc/cgi-bin"
UPLOAD  = CGI_BIN+"/bn/handleupload.pl"
MSEARCH = CGI_BIN+"/bn/beforesearch.py?sid=%(sid)s&nofvals=%(nofvals)s&%(vals)s"
SSEARCH = CGI_BIN+"/bn/search.py?sid=%(sid)s&submit=Start%%20search"
NSEARCH = CGI_BIN+"/bn/searchreport.py?sid=%(sid)s&branch=%(branch)s&from=%(from)d&submit=Next%%20report"
FSEARCH = CGI_BIN+"/bn/finalreport.py?sid=%(sid)s&branch=%(branch)s&submit=Final%%20report"

PNG = CGI_BIN+"/bnetwork.png?sid=%(sid)s&branch=0"
HUGIN = CGI_BIN+"/bn/B-Course.net?sid=%(sid)s&branch=0"

# Regexps

sid = re.compile(r'sid=(\w+?)&')

vs_page_bytes = re.compile(r'received (\d+)  bytes')
vs_page_cases = re.compile(r'were (\d+) cases')
vs_page_vars  = re.compile(r'had (\d+) variables')

branch = re.compile(r'branch\" value=\'(\d)\'')


# Multipart POST -code shamelessly ripped from
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
 
def post_multipart(h, selector, fields, files):
    """
    Post fields and files to an http host as multipart/form-data.
    fields is a sequence of (name, value) elements for regular form fields.
    files is a sequence of (name, filename, value) elements for data to be uploaded as files
    Return the server's response page.
    """
    content_type, body = encode_multipart_formdata(fields, files)
    h.putrequest('POST', selector)
    h.putheader('content-type', content_type)
    h.putheader('content-length', str(len(body)))
    h.endheaders()
    h.send(body)

    return h.getresponse();

def encode_multipart_formdata(fields, files):
    """
    fields is a sequence of (name, value) elements for regular form fields.
    files is a sequence of (name, filename, value) elements for data to be uploaded as files
    Return (content_type, body) ready for httplib.HTTP instance
    """
    BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
    CRLF = '\r\n'
    L = []
    for (key, value) in fields:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"' % key)
        L.append('')
        L.append(value)
    for (key, filename, value) in files:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
        L.append('Content-Type: %s' % get_content_type(filename))
        L.append('')
        L.append(value)
    L.append('--' + BOUNDARY + '--')
    L.append('')
    body = CRLF.join(L)
    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
    return content_type, body

def get_content_type(filename):
    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'


if len(sys.argv) != 3:
    sys.exit("\nUsage: auto-bcourse.py data.txt time-to-wait\n")


fname = sys.argv[1]
loops = int(sys.argv[2])

data = file(fname, "r").read()


# 1. send data

h = httplib.HTTPConnection(BCOURSE, 80)
resp = post_multipart(h, UPLOAD, [('name', 'auto-bcourse')],
                      [('data', fname, data)])


# 2. retrieve variable selection page

loc = resp.getheader("location")
f = urllib2.urlopen("http://" + BCOURSE + loc)

vs_page = f.read()

try:
    bytes = vs_page_bytes.search(vs_page).group(1)
    cases = vs_page_cases.search(vs_page).group(1)
    vars  = vs_page_vars.search(vs_page).group(1)
    sid   = sid.search(f.geturl()).group(1)
except:
    print "B-course didn't accept the data. This is why:\n",vs_page
    sys.exit(1)
    
f.close()

print "Succesfully sent %s bytes of data, consisting \
of %s samples and %s variables." % (bytes, cases, vars)


# 3. retrieve start model search page

f = urllib2.urlopen("http://" + BCOURSE + MSEARCH %\
                  ({'sid':sid,
                    'nofvals':vars,
                    'vals': '&'.join(['%d=1' % i for i in range(int(vars))])}))
f.read()
f.close()


# 4. first search

f = urllib2.urlopen("http://" + BCOURSE + SSEARCH % ({'sid':sid}))

spage = f.read()
f.close()


# 5. search iterations

time.sleep(loops)

# 6. finalize search

bra = branch.search(spage).group(1)
f = urllib2.urlopen("http://" + BCOURSE + FSEARCH %\
                    ({'sid':sid, 'branch': bra}))
f.read()
f.close()

# PNG

f = urllib2.urlopen("http://" + BCOURSE + PNG % ({'sid':sid}))
png_f = file("bnetwork.png","w")
png_f.write(f.read())
png_f.close()
f.close()

print "bnetwork.png written"

# HUGIN

f = urllib2.urlopen("http://" + BCOURSE + HUGIN % ({'sid':sid}))
hug_f = file("bnetwork.net","w")
hug_f.write(f.read())
hug_f.close()
f.close()

print "bnetwork.net written"





