#!/usr/bin/python

import cgi
import cgitb
from spider import *

cgitb.enable()

def validate_input(uri):
    """ The HTML parser apparently accepts any URI without the http part, so if the user inputs such a URI we have to strip it off. Also we need to check if this particular URI exists """
    if uri.find('http://') != -1:
        trail = uri[7:].find('/') 
        if trail != -1:
            site = uri[7:trail+7]
        else:
            site = uri[7:]
    elif uri.find('https://') != -1:
        trail = uri[8:].find('/') 
        if trail != -1:
            site = uri[8:trail+8]
        else:
            site = uri[8:]
    else:
        """Find the site URI without the protocol or the trailing path """
        trail = uri.find('/') 
        if trail != -1:
            site = uri[:trail]
        else:
            site = uri
    return site
    

def print_table(list):
    print "<table style='border = 0'>"
    for val in list:
        print """<tr>"""
        print """<td><img src='%s' """% (val)
        print """height='100'/></td>""" 
        print """<td>By <a href='%s'>""" % (list[val][1])
        print """%s <a/><br>""" % (list[val][0])
        if int(list[val][2]) == 0:
            print "All Rights Reserved"
        else :
            """ The following snippet of code adds the license icons one-by-one as required to indicate the license the Flickr photos come with. Note that the image icons are stored in dig space"""
            #Print the generic CC icons
            print """Has License <img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/cc.jpg' height='20'/> """
            if int(list[val][2]) == 1:
                print """ <a href= 'http://creativecommons.org/licenses/by-nc-sa/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/nc.gif' height='20'/> <img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/sa.gif' height='20'/></a>"""
            if int(list[val][2]) == 2:
                print """ <a href= 'http://creativecommons.org/licenses/by-nc/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/nc.gif' height='20'/> </a>"""
            if int(list[val][2]) == 3:
                print """ <a href= 'http://creativecommons.org/licenses/by-nc-nd/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/nc.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/deriv.gif' height='20'/> </a>"""
            if int(list[val][2]) == 4:
                print """ <a href= 'http://creativecommons.org/licenses/by/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/></a>"""
            if int(list[val][2]) == 5:
                print """ <a href= 'http://creativecommons.org/licenses/by-sa/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/sa.gif' height='20'/></a>"""            
            if int(list[val][2]) == 6:
                print """ <a href= 'http://creativecommons.org/licenses/by-nd/2.5/'><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/attrib.gif' height='20'/><img src='http://dig.csail.mit.edu/2008/WSRI-Exchange/images/deriv.gif' height='20'/></a>"""            
            
            print """</td></tr>"""
    print """</table>"""

def main():
    try:
        print "Content-type: text/html"
        print
        print """
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
      "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xml:lang="en">
  <head>
      <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
      <link rel="stylesheet" type="text/css" href="http://www.w3.org/StyleSheets/TR/base.css" />
      <title>Creative Commons Validator for Flickr Images</title>
   </head>
   
   <body>
      <h1>Creative Commons License Validator for Flickr Images</h1>
"""

        form = cgi.FieldStorage()
        uri = form.getvalue("uri" , "")

        print """
  <p>
      <form method="post" action="cc_validator.cgi">
         <p>URI: 
            <input type="text" size="50" name="uri"/>
            <input type="submit" value="Validate!"/>
         </p>
      </form>
<hr/>
</p>
"""

        if uri != "": #somebody has put in a value for the blog URI
            #TODO: Check whether it's a valid URI
            result = crawl(validate_input(form.getvalue("uri" , "")), '/')
            attr_given = result[0]
            attr_not_given = result[1]
            nameless = result[2]
            
            n_attr_given = len(attr_given)
            n_attr_not_given = len(attr_not_given)
            n_nameless = len(nameless)

            print """
      <form method="post" action="cc_validator.py">
         <p><input type="submit" value="Clear Results"/></p>
      </form>
"""
            print """<b>Results for the site <a href='%s'>""" % (uri) 
            print """%s</a>.</b><br/>""" % (uri)
            if int(n_attr_not_given) > 0:
                print """<br/><p><font color='red'>Found %s license violations for the following images.</font></p>""" % (n_attr_not_given)
                print_table(attr_not_given)
            else:
                print "<p>No license violations detected.<p>"

            if int(n_attr_given) > 0:
                print """<br/><p><font color='green'>These %s Flickr images were embedded in the site with proper attribution.</font></p>""" % (n_attr_given)
                print_table(attr_given)
            
            if int(n_nameless) > 0:
                print """<br/><p>The following images were found in the site, but Flickr does not seem to have a record of the owners of the images.</p>"""
                print_table(nameless)
            
            print """</body></html>"""
            
    except:
        print "<hr>Oops. An error occurred!</hr>"
        cgi.print_exception()
        
if __name__ == "__main__":
  main()
  
