import pickle
import string
import flickrcc
data_file = open('experiment_data/data_3.bak', 'rb')
data = pickle.load(data_file)
outfile = open("../results_3.html", "w")
content = """
Creative Commons License Violations
Creative Commons License Violations - Experimental Result 2
Statistics
"""
total_imgs = 0
n_attributed = 0
n_not_attributed = 0
n_error = 0
for datum in data:
n_attributed = n_attributed + len(datum[1])
n_not_attributed = n_not_attributed + len(datum[2])
n_error = n_error + len(datum[3])
total_imgs = n_attributed + n_not_attributed + n_error
content = content + "Total number of websites tested = %d " %len(data)
content = content + "Total number of images in all of the websites = %d " %total_imgs
content = content + "Total number of properly attributed images in all of the websites = %d " %n_attributed
content = content + "Total number of Non-Attributed Images = %d " %n_not_attributed
content = content + "Total number of images that had an error (Due to bad HTML, parsing errors, Flickr errors) = %d " %n_error
content = content + "Misattribution Percentage = %d percent " %(n_not_attributed*100/total_imgs)
content = content + "License Violations Detected in Each Individual Sites "
for datum in data:
if len(datum[2]) > 0:
table_start = string.Template("
Non-Attributed Flickr Image Owner License ")
s = table_start.substitute(website=datum[0])
content = content + s
for img in datum[2]:
img_uri = img[0]
if img_uri != None:
table_content = string.Template("""
$owner_name
$license
""")
license_str = ["All Rights Reserved",
" ",
" ",
" ",
" ",
" ",
" "]
content = content + table_content.substitute(image_uri=img[0],owner_uri=img[3], owner_name=img[2], license=license_str[int(img[1].encode("latin"))])
content = content + "
"
content = content + ""
outfile.write(content.encode("utf-8"))
outfile.close()
data_file.close()