<?php
class Indexer{
	private $site_id = 0;
	private $data = array();
	private $words = array();
	private $published = array();
	private $ignored = array();
	private $default_match = 75;
	private $match = 0; // to be read fom settings rss_acc in constructor
/////////////////////
	public function __construct($site_id = 0){
		if ($site_id > 0){
			$this->site_id = $site_id;
		}
		$setting = new SettingCreator();
		$this->match = $setting->getSetting('rss_acc');
		if ($this->match <= 0 || $this->match >100){
			$this->match = $this->default_match;
		}
	}//
	
	public function index(){
		// get data
		$this->getData();
		$this->process();
		$this->publishAndIgnore();
	}//
/////////////////////

	private function getData(){	// gets data and stems the titles
		$dataQ = mysql_query("select * from rss_pool where site_id='".$this->site_id."' and (sent = 1 or ignored = 0 or to_send = 1) and bypassCompare=0");
		while ($data = mysql_fetch_object($dataQ)){
			$data->stem = stem($data->title); // array
			// create words	// word is in following ids
				foreach ($data->stem as $word){
					$this->words[$word][] = $data->id;
				}
			$this->data[$data->id] = $data;
		}//
		mysql_free_result($dataQ);
	}//end get data
	
	private function process(){	// starts processing the data
		$acceptance = $this->match / 100;
		
		$groups=array();
		
		// get default group
			$setting = new SettingCreator();
			$defaultGroup->max_count =  $setting->getSetting('rss_max'); 
			$defaultGroup->min_count =  $setting->getSetting('rss_min'); 
			$defaultGroup->exact =  $setting->getSetting('rss_exact');
		
		
		
		foreach ($this->data as $data){
			//echo 'trying '.$data->id."\n";
			if (!isset($this->ignored[$data->id])){
				//echo 'trying '.$data->id." ok \n";
				// get group or set defaults
					$thisGroup = '';
					if ($data->group_id == 0){//load default group
						$thisGroup = $defaultGroup;
					}else{	// load group from db
						if (!isset($groups[$data->group_id])){
							$groups[$data->group_id] = mysql_fetch_object(mysql_query("select * from rss_groups where id = '".$data->group_id."'"));
						}
						$thisGroup = $groups[$data->group_id];
					}//
					
				// get list of news with similar words
					unset($counts);
					$counts=array();
					unset($sorter);
					$sorter=array();

					foreach ($data->stem as $word){
						foreach($this->words[$word] as $newsIds){
							if (!isset($counts[$newsIds])){$counts[$newsIds]=0;}
							$counts[$newsIds]++;
						}
					}
					
				// get the news ids who have more of equal to 'match'
					unset($filter_1);
					$filter_1 = array();
					$min_words=ceil(count($data->stem)*($acceptance));
					foreach ($counts as $key=>$value){
						if ($counts[$key] >= $min_words){ // the news is accepted , add to 'filter1' array
								if (!isset($filter_1[$key])){
									$filter_1[$key]=$key;
								}
						}
					}//
					
				// filter out bad news 7asab the group settings if there are any values in the first filter
					unset($filter_2);
					$filter_2 = array();
					
					$is_published = false;
					
					if (count($filter_1)>0){
						foreach ($filter_1 as $newsId){
							$stemCount = count($this->data[$newsId]->stem);
							$stemCountDef = count($this->data[$newsId]->stem) - count($data->stem);
							if (
								($stemCountDef <= $thisGroup->max_count && $stemCountDef >= (-1*$thisGroup->min_count)) ||
								($stemCount == $thisGroup->exact && $thisGroup->exact>0) ||
								($stemCountDef == 0)
								){
								$filter_2[$newsId] = $newsId;				// add to second filter 
								//	if one of the accepted news is published already , 
								//	or has been published through the loop
								//	say it (this means one of the 2nd filter news which is a match has been published so do not publish this one
									if ((isset($this->data[$newsId]) && $this->data[$newsId]->to_send == '1') || 
										(isset($this->published[$newsId]) && $this->published[$newsId] == '1')){	
										$is_published = true;
									}
							}
						}//
					
						// if not published , choose one from filter_2 ans publish it
							if ($is_published == false){
								//select the one with non empty title and least original publish date which is > 0
								$leastPubDate = 0;
								$lastSelected = 0;
								foreach ($filter_2 as $filtererdId){
									$toCheck = $this->data[$filtererdId];
									if (trim($toCheck->title) != ''){	// if title not empty
										if ($leastPubDate == 0 || 
											($toCheck->original_publish_ts < $leastPubDate && $toCheck->original_publish_t > 0)){
											$lastSelected = $filtererdId;
										}
									}
								}//
								
								// selected news id is $lastSelected , set its to_send=1 in the pool
								$this->toSend($lastSelected);
							}//
							
						// ignore all 2nd filter
						$this->ignore($filter_2);
					}//
			}// end if not ignored
		}//end foreach
		
		//print_r($this->published);
		//print_r($this->ignored);
	}//
	
	private function toSend($id){
		//echo 'publishing '.$id."\n";
		$this->published[$id] = '1';
	}//
	
	private function ignore($ids){
		foreach ($ids as $filtererdId){
			$this->ignored[$filtererdId] = '1';
		}//
	}
	
	private function publishAndIgnore(){
		// publish news
			$publish_ids = '';
			foreach ($this->published as $id=>$val){
					$publish_ids.=','.$id;
					
				}
				
			$publish_ids=substr($publish_ids,1);
			if ($publish_ids!=''){
				mysql_query("update rss_pool set `to_send`='1' where id in ($publish_ids)");
			}
		
		//ignore news
			$ignore_ids = '';
			foreach ($this->ignored as $id=>$val){
					$ignore_ids.=','.$id;
					
				}
				
			$ignore_ids=substr($ignore_ids,1);
			if ($ignore_ids!=''){
				mysql_query("update rss_pool set `ignored`='1' where id in ($ignore_ids)");
			}
	}
}//end class