/* SocialUnderstanding Page Interpreter
 * Copyright 2008  Social Actions  (email : peter@socialactions.com)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * 
 * @author      Social Actions <peter[at]socialactions[dot]com>
 * @author      E. Cooper <smirkingsisyphus[at]gmail[dot]com>
 * @copyright   2008 Social Actions
 * @license     http://www.gnu.org/licenses/gpl-3.0.html
 * @link        http://www.socialactions.com/labs/wordpress-donorschoose-plugin
 * 
 */

/*
 * Constructor for keywords class. Allows optional setting of stop and hot lists
 *	 
 * @param ignore array or string of words to add to ignore list
 * @param hot array or string of words to add to hot list
 * @param hotWt value of weighting to apply to hot list words
 * @returns bool
 */

/*function su( ignore, hot, hotWt ) {
	this.doc	= document;
	this.keywords = [];
	this.keywordsSorted = [];
	this.ignore = su._makeWordArray( ignore );
	this.hot = su._makeWordArray( hot );
	this.hotWt = su._makeValidWt( hotWt );
}*/

//Set namespace to make code a bit eaiser to read
this.su = this.su || {};

su.doc = document;
su.keywords = {};
su.keywordsSorted = [];
su.ignore = {};
su.hot = {};
su.hotWt = 1;

/*
 * Sets the mapping for finding special sections of the page, like links
 * alts for images, page titles, etc. This can be modified ont he fly through
 * the code depending on your page.
 */
su.keywordMap = [

		{
			name: 'links',
			tag: ['a'],
			//class: [],
			//id: [],
			value: ['innerHTML'],
			weight: 1.2
		},
		
		/* title: { 
			tag: ['title'],
			class: [],
			id: [],
			value: ['innerHTML'],
			weight: 1.1,
		},*/
		
		{
			name: 'bold',
			tag: ['b', 'strong', 'h1', 'h2', 'h3'],
			//class: [],
			//id: [],
			value: ['innerHTML'],
			weight: 1.3
		},
		
		{
			name: 'img',
			tag: ['img'],
			//class: [],
			//id: [],
			value: ['alt'],
			weight: 1.1
		},
];

su.ignoreTags = {
	tag: { script: 1, style: 1, meta: 1, object: 1, param: 1, embed: 1, link: 1, form: 1 },
	id: [ /ads/i, /comment/i, /advert/i, /sphere/i ]
};
	
	
su.addIgnore = function( list ) {
	su.ignore = su._makeWordArray( list );
}
		

/*
 * Adds keywords to current pool from a given chunk of text. Can optionally give section name.
 *
 * @param text a string of text to extract keywords from.
 * @returns bool
 */
su.addKeywords =  function( text, wt, section ) {
	if ( !section )
		section = 'default';
				
	textNode = su.doc.createElement('div');
	textNode.innerHTML = text;
	
	delete text;	
	
	textNode = su._killScript( textNode, su.ignoreTags );
	
	su.keywords[section] = [];	
		
	for ( var x in su.keywordMap ) {
		if ( su.keywordMap[x].tag.length >= 1 || su.keywordMap[x].value.length >= 1 ) {
			var name = su.keywordMap[x].name;
			for ( var i in su.keywordMap[x].tag ) {
				var nodes = textNode.getElementsByTagName( su.keywordMap[x].tag[i] );
				if ( nodes && nodes.length >= 1 ) {
					for ( var z in nodes ) {
						if ( nodes.length < 1 || z > 1000 )
							break;
						if ( !su.keywords[section][name] )
							su.keywords[section][name] = [];
						for ( var y in su.keywordMap[x].value ) {
							var attr = su.keywordMap[x].value[y];
							var tmpStr = '';
							if ( su.keywordMap[x].value[y] == "innerHTML" ) {
								var tmpStr = nodes[0].innerHTML;							
							} else {
								var tmpStr = nodes[0].getAttribute( attr );
							}							
							var tmpWt = su.keywordMap[x].weight;							
							if ( tmpStr ) {
								tmpStr = tmpStr.replace(/&(lt|gt);/g, function (strMatch, p1){ return (p1 == "lt")? "<" : ">"; });
								tmpStr = tmpStr.replace( /<[\/\!]*?[^<>]*?>/gi, "");
								tmpWt = tmpWt * wt;
								if ( !su.keywords[section][name] )
									su.keywords[section][name] = [];
								su.keywords[section][name] = su._updateKeywords( su.keywords[section][name], tmpStr, tmpWt );
							}
						}
					
						nodes[0].parentNode.removeChild( nodes[0] );
					}
				}
			}
		}
	}
	if ( !su.keywords[section]['default'] )	
		su.keywords[section]['default'] = [];
	
	var tmpStr = textNode.innerHTML;
	tmpStr = tmpStr.replace(/&(lt|gt);/g, function (strMatch, p1){ return (p1 == "lt")? "<" : ">"; });
	tmpStr = tmpStr.replace( /<[\/\!]*?[^<>]*?>/gi, "");
	tmpStr = tmpStr.replace( /<[^>]+>/g, ""); 
	
	su.keywords[section]['default'] = su._updateKeywords( su.keywords[section]['default'], tmpStr, wt );

}

su.makeList = function ( delim, limit, section ) {
	delim = delim || ", ";
	limit = limit || 1;
	section = section || 'default';
	
	su._sortKeywords( section );
	
	var i = 1;
	var list = '';
	var keywords = su.keywordsSorted;
	
	list = keywords.splice( 0, (limit--) ).join( delim );
	
	return list; 	
}

su._updateKeywords = function ( keywords, str, wt ) {
	str = su._trim( str.toLowerCase() );
	var words = str.split( /\s*[\s+\.|\?|,|\[|\]|\!|(|)|\-+|\"|\'|\"|=|;|&#0215;|&#187;|\$|\/|:|{|}]\s*/i );
	var ignore = su.ignore;
	var hot = su.hot;
	var hotWt = su.hotWt;
	chkWords:
	for ( var x in words ) {
		var tmpHotWt = 0;
		var word = words[x];
		var sWord = word.replace(/^(.+)(d|ed|ly|s|es|ing)$/gi, "$1");
		
		if ( ignore[word] || word == "" || word.length <= 3 || word.match(/^([0-9])+$/) || word.match( /[^a-z0-9]/) || ignore[sWord] )
			continue chkWords;
		
		if ( hot[word] || hot[sWord] )
				tmpHotWt = hotWt;
		
		if ( su._isSuffixed( word ) ) {
				if ( keywords[word.replace( /(.+)(e?s)/, "$1")] ) {
					word = word.replace( /(.+)(e?s)/, "$1");
				}
		} else {
				if ( keywords[word + "s"] ) {
					word = word + "s";
				}
		}
			
		if ( keywords[word] ) {
			keywords[word] += ( 1 * (wt + tmpHotWt ) );
		} else {
			keywords[word] = ( 1 * ( wt + tmpHotWt ) );
		}
	}
	
	return keywords;
}	

su._sortKeywords = function( section ) {
	var keywords = su.keywords
	var kSorted = [];
	var unSorted = [];
	var fArr = [];	
	
	if ( !keywords[section] )
		section = 'default';
		
	for ( var i in keywords[section] ) {
		var subSection = keywords[section][i];		
		for ( var k in subSection ) {
			var word = k;			
			if ( su._isSuffixed( word ) ) {
				if ( unSorted[word.replace( /(.+)(e?s)/, "$1")] ) {
					word = word.replace( /(.+)(e?s)/, "$1");
				}
			}
			
			if ( unSorted[word] ) {
				unSorted[word] += subSection[k];
			} else {
				unSorted[word] = subSection[k];
			}		
		}
	}	
		
	for ( var i in unSorted ) {	
		fArr.unshift( unSorted[i] );
	}
				
	fArr.sort( function(a,b){return b - a} );
	
	sortedWords:
	for ( var i in fArr ) {
			for ( var k in unSorted ) {
				if ( unSorted[k] == fArr[i] ) {
					kSorted.push(k);
					delete unSorted[k]; 
					continue sortedWords;	
				}
			}
	}
	
	su.keywordsSorted = kSorted;
	
	return true;
}				 
		
		
su._makeWordArray = function ( list ) {
	///if ( !list instanceof Array )	
	list = list.toLowerCase().split(", ");
	
	var ignore = {};
	
	for (var i in list ) {
		ignore[list[i]] = 1;
	}
	
	return ignore;
}

su._makeValidWt = function ( wt ) {
	if ( !wt ) {
		return 1;
	} else {
		if ( wt < 0 ) {
			wt *= -1;
			return wt;
		} else if ( wt >= 5 ) {
			wt = 5;
			return wt;
		}
	}
	
	return wt;	
}

su._isSuffixed = function ( word ) {
	if ( !word.match( /(.+)(e?)s$/i ) )
		return false;
		
	return true;
}

su._trim = function (str) {
	var	str = str.replace(/^\s\s*/, ''),
		ws = /\s/,
		i = str.length;
	while (ws.test(str.charAt(--i)));
	return str.slice(0, i + 1);
}

su._killScript = function ( node, tags ) {
	var e = node.getElementsByTagName("*");	
	
	for ( var i in e ) {					
		if ( e[i] ) {
			if ( e[i].tagName && tags.tag[e[i].tagName.toLowerCase()] ) {
				e[i].parentNode.removeChild(e[i]);
			} else {
				for ( var t in tags.id ) {
					if ( e[i].id && e[i].id.match(tags.id[t]) ) {
						e[i].parentNode.removeChild(e[i]);
						break;
					}
				}
			}	 
		}	
	}
		
	return node;
}

var ignore = "\", ”, ″, ', count, action, round, september, member, peter, deitz, address, posted, friend, favorite, person, email, views, remove, mdash, src, var, minewidget, ct, week, posted, copy, data, 4data, expr, moneybox, document, write, hubspot, html, apache, any, page, google, feed, test, download, readt. a, company, del, ic, ous, raquo, begin, unit, sphere, user, companies, event, pdt, site, product, about, edit, reply, service, http, www, url, px, address, link, wikipedia, demand, dark, discussion, admin, log, rss, agree, work, push, recognize, should, professional, shift, above, across, after, afterwards, again, against, all, almost, alone, along, already, also, although, always, am, among, amongst, amoungst, amount, an, and, another, any, anyhow, anyone, anything, anyway, anywhere, are, around, as, at, back, be, became, because, become, becomes, becoming, been, before, beforehand, behind, being, below, beside, besides, between, beyond, bill, both, bottom, but, by, call, can, cannot, cant, co, computer, con, could, couldnt, cry, de, describe, detail, do, done, down, due, during, each, eg, eight, either, eleven, else, elsewhere, empty, enough, etc, even, ever, every, everyone, everything, everywhere, except, few, fifteen, fify, fill, find, fire, first, five, for, former, formerly, forty, found, four, from, front, full, further, get, give, go, had, has, hasnt, have, he, hence, her, here, hereafter, hereby, herein, hereupon, hers, herself, him, himself, his, how, however, hundred, i, ie, if, in, inc, indeed, interest, into, is, it, its, itself, keep, last, latter, latterly, least, less, ltd, made, many, may, me, meanwhile, might, mill, mine, more, moreover, most, mostly, move, much, must, my, myself, name, namely, neither, never, nevertheless, next, nine, no, nobody, none, noone, nor, not, nothing, now, nowhere, of, off, often, on, once, one, only, onto, or, other, others, otherwise, our, ours, ourselves, out, over, own, part, per, perhaps, please, put, rather, re, same, see, seem, seemed, seeming, seems, serious, several, she, should, show, side, since, sincere, six, sixty, so, some, somehow, someone, something, sometime, sometimes, somewhere, still, such, system, take, ten, than, that, the, their, them, themselves, then, thence, there, thereafter, thereby, therefore, therein, thereupon, these, they, thick, thin, third, this, those, though, three, through, throughout, thru, thus, to, together, too, top, toward, towards, twelve, twenty, two, un, under, until, up, upon, us, very, via, was, we, well, were, what, whatever, when, whence, whenever, where, whereafter, whereas, whereby, wherein, whereupon, wherever, whether, which, while, whither, who, whoever, whole, whom, whose, why, will, with, within, without, would, yet, you, your, yours, yourself, yourselves, post, social, don’t, sort, able, page, am, pm, comment, comments, students, classroom, projects, challenge, children, shop, speak, it's, education, its, ning, session, message, conference, own, flat, face, district, build, net, com, org, ms, miss, mr, sir, north, south, said, loss, give, &nbsp;, his, people, always, ever, —, -, vs, nor, %, percent, poll, illicit, add, ad, oh, must, like, liking, liked, trying, never, i’ve, learning, i, information, comment, before, more, less, thanks, before, night, day, monday, tuesday, thursday, friday, saturday, sunday, april, may, june, july, december, january, november, august, current, wordpress, wp, key, a, an, the, and, of, i, to, is, in, with, for, as, that, on, at, this, my, was, our, it, you, we, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, about, after, all, almost, along, also, amp, another, any, are, area, around, available, back, be, because, been, being, best, better, big, bit, both, but, by, c, came, can, capable, control, could, course, d, dan, day, decided, did, didn, different, div, do, doesn, don, down, drive, e, each, easily, easy, edition, end, enough, even, every, example, few, find, first, found, from, get, go, going, good, got, gt, had, hard, has, have, he, her, here, how, if, into, isn, just, know, last, left, li, like, little, ll, long, look, lot, lt, m, made, make, many, mb, me, menu, might, mm, more, most, much, name, nbsp, need, new, no, not, now, number, off, old, one, only, or, original, other, out, over, part, place, point, pretty, probably, problem, put, quite, quot, r, re, really, results, right, s, same, saw, see, set, several, she, sherree, should, since, size, small, so, some, something, special, still, stuff, such, sure, system, t, take, than, their, them, then, there, these, they, thing, things, think, those, though, through, time, today, together, too, took, two, up, us, use, used, using, ve, very, want, way, well, went, were, what, when, where, which, while, white, who, will, would, your";


var text = su.doc.getElementsByTagName('body');
text = text[0].innerHTML;

su.addIgnore( ignore );
su.addKeywords(text, 1);

var p = su.makeList( "|", 3);
var h = window.location.host.replace(/^www\./, "" );

document.write("<iframe id='relatedActions' align='center' src='http://www.socialactions.com/related-ways-to-take-action/ra.php?k=" + p + "&h=" + h + "' frameborder='0' scrolling='no' height='335px' width='170px' ></iframe>");