import pickle import string import flickrcc data_file = open('experiment_data/data_3.bak', 'rb') data = pickle.load(data_file) outfile = open("../results_3.html", "w") content = """ Creative Commons License Violations

Creative Commons License Violations - Experimental Result 2

Statistics

""" total_imgs = 0 n_attributed = 0 n_not_attributed = 0 n_error = 0 for datum in data: n_attributed = n_attributed + len(datum[1]) n_not_attributed = n_not_attributed + len(datum[2]) n_error = n_error + len(datum[3]) total_imgs = n_attributed + n_not_attributed + n_error content = content + "

Total number of websites tested = %d
Total number of images in all of the websites = %d
Total number of properly attributed images in all of the websites = %d
Total number of Non-Attributed Images = %d
Total number of images that had an error (Due to bad HTML, parsing errors, Flickr errors) = %d
Misattribution Percentage = %d percent

" %(n_not_attributed*100/total_imgs) content = content + "

License Violations Detected in Each Individual Sites

" for datum in data: if len(datum[2]) > 0: table_start = string.Template("

$website

") s = table_start.substitute(website=datum[0]) content = content + s for img in datum[2]: img_uri = img[0] if img_uri != None: table_content = string.Template(""" """) license_str = ["All Rights Reserved", "

", "

"] content = content + table_content.substitute(image_uri=img[0],owner_uri=img[3], owner_name=img[2], license=license_str[int(img[1].encode("latin"))]) content = content + "

Non-Attributed Flickr Image	Owner	License
	$owner_name	$license

" content = content + "" outfile.write(content.encode("utf-8")) outfile.close() data_file.close()