<?php
	/**
	 * ClassGrepSearch class using search string in files
	 * Author: Garai Tams, 2015 April
	 * MIT licence: https://opensource.org/licenses/MIT
	**/
	class ClassGrepSearch {
		/**
		 * Global variables
		**/
		public $classGrepSearch;
		public static $extensions=array();
		private static $instance=0;
		private	$searchType="";
		private	$caseSensitive=false;
		private	$searchArray=array();
		private	$globalResult=true;
		private	$globalCount=0;
		private	$fileCounter=0;
		private	$arrayOfFilenames=array();
		private	$dirFile=array();
		private	$dirArr=array();
		private	$searchString="";
		private	$newArrayOfExtentions=array();
		private	$scanDir="";
		
		/**
		 * returns an instance of the ClassForGrep object.
		 * @access public
		 * @static
		 *
		**/
		public static function getInstance() {
			if (ClassGrepSearch::$instance === 0)
				ClassGrepSearch::$instance=new ClassGrepSearch();
			
			return ClassGrepSearch::$instance;
		}
		
		
		/**
		 * creates a new ClassGrepSearch object.
		 * @access protected
		**/ 
		public function __construct() {
			return $this->classGrepSearch;
		}
		
		/**
		 * to create and array of extentions
		 *
		 *
		 * @param $separator string/char
		 * @param $filesWithExtentionsToBeSearched string
		 *
		 * @return array
		**/
		public function createArrayOfExtentions($separator,$filesWithExtentionsToBeSearched) {
			$arrayOfExtentions=explode($separator,$filesWithExtentionsToBeSearched);
			
			foreach($arrayOfExtentions as $items )
				$newArrayOfExtentions[]=trim($items);
			
			$this->newArrayOfExtentions=$newArrayOfExtentions;
		}							
		
		/**
		 *
		 * function to convert time in secs into mins
		 *
		 * @param $timeInSec int
		 *
		 * $return string
		**/
		public function convertSecToMins($timeInSec) {
			if ($timeInSec<60) {
				if ($timeInSec>1)
					$secString="secs";
				else
					$secString="sec";
				
				$timeTaken = "$timeInSec $secString";
			}
			else {
				$seconds=($timeInSec%60);
				$minutes=($timeInSec/60);
				$minutes=sprintf("%01.0f",$minutes);
				
				if ($minutes>1)
					$minString="mins";
				else
					$minString="min";
				
				if ($seconds>1)
					$secString="secs";
				else
					$secString="sec";
				
				$timeTaken="$minutes $minString $seconds $secString";
			}
			
			return $timeTaken;
		}
		
		/**
		 *
		 * function to replace last occurrences of the search string with 
		 * the replacement string .  
		 *
		 * @param $search string
		 * @param $replace string
		 * @param $subject string
		 *
		 * $return string 
		**/
		public function lastStrReplace($search,$replace,$subject) {
			$positionOfLastString=strrpos($subject,$search);
			$stingBeforeSearchSting=substr($subject,0,$positionOfLastString)." ";
			$stingAfterSearchSting=substr($subject,$positionOfLastString+1,strlen($subject));
			
			return $newString=$stingBeforeSearchSting.$replace.$stingAfterSearchSting;
		}
		
		/**
		 *
		 * function to replace all occurrences of the search string with 
		 * the replacement string .  
		 *
		 * @param $replace string 
		 * @param $subject string 
		 *
		 * $return string 
		**/
		
		public function allStrReplace($subject,$replace) {
			$result="";
			$strPositions=$this->searchMultipleStrPositions($subject);
			
			if (count($strPositions)!=0) {
				$this->globalCount+=count($strPositions); 
				$currentString=$subject; 
				$offset=0;
				
				foreach($strPositions as $pos) {
					$stringBeforeSearchString=substr($currentString,0,$pos[0]-$offset);
					$stringAfterSearchString=substr($currentString,$pos[0]+$pos[1]-$offset,strlen($currentString));
					$result=$result.$stringBeforeSearchString.$replace;
					$currentString=$stringAfterSearchString;
					$offset+=strlen($stringBeforeSearchString)+$pos[1];
				}
				
				$result=$result.$stringAfterSearchString;
				$this->globalResult=true;
				
				return $result; 
			}
			else {
				$this->globalResult=false;
				
				return $subject;
			}
			
			$subject=$result; 
			$this->globalResult=true; 
			
			return $subject;
		}
		
		/**
		 * Replace the special meanings < with <&nbsp;
		 **/
		public function changePHPKeyString($subject) {
			$result='';
			
			for($i=0;$i<strlen($subject);$i++) {
				if ($subject[$i]==='<')
					$result.='`';
				else if ($subject[$i]==='>')
					$result.='`';
				else
					$result.=$subject[$i];
			}
			
			return $result;
		}
		
		/**
		 *
		 * function to replace all occurrences of the search string with 
		 * the string enclosed in search and end tags .  
		 *
		 * @param $search string 
		 * @param $startTag string 
		 * @param $endTag string 
		 *
		 * $return string 
		**/
		public function allStrReplaceTag($subject,$startTag,$endTag) {
			$result="";
			$strPositions=$this->searchMultipleStrPositions($subject);
			
			if (count($strPositions)!=0) {
				$this->globalCount+=count($strPositions);
				$currentString=$this->changePHPKeyString($subject);
				$offset=0;
				
				foreach ($strPositions as $pos) {
					$stringBeforeSearchString=substr($currentString,0,$pos[0]-$offset);
					$stringAfterSearchString=substr($currentString,$pos[0]+$pos[1]-$offset,strlen($currentString));
					$result=$result.$stringBeforeSearchString.$startTag.substr($subject,$pos[0],$pos[1]).$endTag; 
					$currentString=$stringAfterSearchString;
					$offset+= strlen($stringBeforeSearchString)+$pos[1];
				}
				
				$result=$result.$stringAfterSearchString;
				$this->globalResult=true;
				
				return $result;
			}
			else {
				$this->globalResult=false; 
				return $subject; 
			} 
			
			$subject=$result;
			$this->globalResult=true;
			return $subject;
		}
		
		/** 
		 * 
		 * function to search all string occurences and return   
		 *    the array of occurences.   
		 * 
		 * @param $search string  
		 * @param $subject string 
		 * 
		 * $return array 
		**/
		public function searchStrPositions($search,$subject) {
			$searchOccurrences=array(); 
			$positionOfFirstString=strpos($subject,$search);
			
			if ($positionOfFirstString) {
				$positionOfCurrentString=$positionOfFirstString;
				$currentString=$subject;
				$stringOffset=0;
				
				while ($positionOfCurrentString) {
					array_push($searchOccurrences,$positionOfCurrentString);
					$stringAfterSearchString=substr($currentString,$positionOfCurrentString+strlen($search),strlen($currentString));
					$currentString=$stringAfterSearchString;
					$positionOfCurrentString=strpos($currentString,$search); 
				}
			}
			
			return $searchOccurrences;
		}
		
		/**
		 *
		 * function to search all string occurences of multiple
		 * strings in array and return  
		 * the array of occurences.   
		 * 
		 * @param $subject string 
		 * 
		 * $return array 
		**/
		public function searchMultipleStrPositions($subject) {
			$searchOccurrences=array();
			$stringOffset=0;
			
			while (true) {
				$newPosition=false;
				
				foreach ($this->searchArray as $search) {
					if (trim($search)=="")
						continue;
					
					$search = trim($search);
					
					if ($this->caseSensitive)
						$positionOfCurrentString=strpos($subject,$search,$stringOffset);
					else
						$positionOfCurrentString=stripos($subject,$search,$stringOffset);				
					
					if (($newPosition === false || $positionOfCurrentString<$newPosition) &&
							 $positionOfCurrentString!==false) {
						$newPosition=$positionOfCurrentString;
						$currentSearchTerm=$search;
					}
				}
				
				if ($newPosition===false)
					break;
				
				$positionOfCurrentString=$newPosition;
				$searchTermArray=array($positionOfCurrentString,strlen($currentSearchTerm));
				array_push($searchOccurrences,$searchTermArray);
				$stringOffset=$positionOfCurrentString+strlen($currentSearchTerm);
			}
			
			if ($this->searchType=="all") {
				$theNewString=" ";
				
				foreach ($searchOccurrences as $pos)
					$theNewString=$theNewString.substr($subject,$pos[0],$pos[1]);
				
				$allSearchStringsPresent=true;
				
				foreach($this->searchArray as $search) {
					$search=trim($search);
					
					if ($search=="")
						continue;
					
					if ($this->caseSensitive)
						$positionOfCurrentString=strpos($theNewString,$search);
					else
						$positionOfCurrentString=stripos($theNewString,$search);
					
					if (!$positionOfCurrentString) {
						$allSearchStringsPresent=false;
						break;
					}
				}
				
				if (!$allSearchStringsPresent) {
					$searchOccurrences=array();
					
					return $searchOccurrences;
				}
			}
			
			return $searchOccurrences;
		}
		
		/**
		 *
		 * function to get the search count  
		 *  of keyword in a specified file. 
		 *
		 * @param $filePath string 
		 *
		**/
		public function getSearchCount($filePath) {
			$fileContents=file_get_contents($filePath);
			$searchCount=0;
			
			foreach ($this->searchArray as $searchStr) {
				if (trim($searchStr)=="")
					continue;
				
				if ($this->caseSensitive)
					$searchCount+=substr_count($fileContents,$searchStr);
				else
					$searchCount+=substr_count(strtoupper($fileContents),strtoupper($searchStr));
			}
			
			return  $searchCount;
		}
		
		/**
		 *
		 * function to craete a search Array of strings
		 * from a given Search String. 
		 *
		 * @param $searchString string 
		 *
		**/
		public function createSearchArray($searchString) {
			$newSearchString=trim(stripslashes($searchString));
			
			if ($this->searchType=="phrase")
				$newSearchString="\"".$searchString."\"";
			
			$this->searchArray=preg_split("/[\s,]*\\\"([^\\\"]+)\\\"[\s,]*|[\s,]+/",$newSearchString,0,PREG_SPLIT_DELIM_CAPTURE );
		}
		
		/**
		 *
		 * function to read the directories and files in
		 * the specified path for keyword
		 *
		 * @param $path string 
		 *
		**/
		public function readDir($path,$explode_sample=".") {
			$handle=@opendir($path);
			
			while ($file=@readdir($handle)) {
				$totalCount=0;
				
				if ($file!="." &&
						$file!=".." &&
						is_dir($path.'/'.$file)) {
					$subDir=$path.'/'.$file;
					array_push($this->dirArr,$file);
					$this->readDir($subDir);
				}
				else if ($file!="." &&
								 $file!=".." &&
								 is_file($path.'/'.$file)) {
					$subDir=$path.'/'.$file;
					
					// if the file extension is empty
					$positionOfFirstString=strpos($file,'.');
					$positionOfLastString=strrpos($file,'.');
					if (($positionOfFirstString!==0) && ($positionOfLastString=='')) {
						// add the new temporarly file extension from the extensions array
						if (count(self::$extensions)!==0) {
							// check the first element of the extensions array
							if (self::$extensions[0]!=='&nbsp;') {
								$ext=self::$extensions[0];
								$filePath=$subDir;
								$displayFilename=str_replace($this->scanDir,'.',$filePath);
							}
							else {
								die("<b><u>TO USE AN EMPTY FILE EXTENSION AS THE FIRST ONE IS FORBIDDEN!</u></b>");
							}
						}
					}
					// if the file extension is not empty
					else {
						$temp=array_filter(explode($explode_sample,$file));
						$ext=end($temp);
						$filePath=$subDir;
						$displayFilename=str_replace($this->scanDir,'.',$filePath);
					}
					
					// start searching and replacing	
					//  searches only files extention in the given array
					if (in_array(trim($ext),$this->newArrayOfExtentions)) {
						$fileContents='';
						$lines=file($displayFilename);
						$this->createSearchArray($this->searchString);
						
						/** TEST: http://stackoverflow.com/questions/4366730/check-if-string-contains-specific-words
						$match_results=$this->get_similar_documents($this->searchArray[0],$lines,"\n");
						echo '<pre>';
						print_r($match_results);
						echo '</pre>';
						**/
						
						foreach ( $lines as $number => $line ) {
							if ($this->containsWord(trim($line),$this->searchArray[0],$this->caseSensitive))
								$fileContents.=$line;
						}
						
						/** OLDER SOLUTION
						foreach ( $lines as $number => $line ) {
							if (($number==0) && (trim($line)===$this->searchArray[0])) {
								$fileContents.='&nbsp;'.$line;
							}
							else {
								$fileContents.=$line;
							}
						}
						**/
						
						// how many different string has been found which has been searched * how many files have been found
						foreach ($this->searchArray as $searchStr) {
							if (trim($searchStr)=="")
								continue;
							
							$searchCount=$fileContents;
							
							/*if ($this->caseSensitive)
								$searchCount=strpos($fileContents,$searchStr);
							else
								$searchCount=stripos($fileContents,$searchStr);
							*/
							
							if ($searchCount && !$this->searchType=="allInFile")
								break;
							else if (!$searchCount && $this->searchType=="allInFile")
								break;
						}
						
						if (isset($searchCount)) {
							if ($searchCount) {
								array_push($this->arrayOfFilenames,$displayFilename);
								$this->fileCounter++;
							}
						}
						
						array_push($this->dirFile,$file) ;
					}
				}
			}
			
			closedir($handle);
			return $this->fileCounter;
		}
		
		/**
		 *
		 * function to read only files in
		 * the specified path for keyword
		 *
		 * @param $path string 
		 *
		**/
		public function readFiles($path,$explode_sample=".") {
			$handle=@opendir($path);
			
			while ($file=@readdir($handle)) {
				$totalCount=0;
				
				if ($file!="." &&
						$file!=".." &&
						is_file($path.'/'.$file)) {
					$subDir=$path.'/'.$file;
					
					// if the file extension is empty
					$positionOfFirstString=strpos($file,'.');
					$positionOfLastString=strrpos($file,'.');
					if (($positionOfFirstString!==0) && ($positionOfLastString=='')) {
						// add the new temporarly file extension from the extensions array
						if (count(self::$extensions)!==0) {
							// check the first element of the extensions array
							if (self::$extensions[0]!=='&nbsp;') {
								$ext=self::$extensions[0];
								$filePath=$subDir;
								$displayFilename=str_replace($this->scanDir,'.',$filePath);
							}
							else {
								die("<b><u>TO USE AN EMPTY FILE EXTENSION AS THE FIRST ONE IS FORBIDDEN!</u></b>");
							}
						}
					}
					// if the file extension is not empty
					else {
						$temp=array_filter(explode($explode_sample,$file));
						$ext=end($temp);
						$filePath=$subDir;
						$displayFilename=str_replace($this->scanDir,'.',$filePath);
					}
					
					// start searching and replacing	
					// searches only files extention in the given array
					if (in_array(trim($ext),$this->newArrayOfExtentions)) {
						$fileContents='';
						$lines=file($displayFilename);
						$this->createSearchArray($this->searchString);
						
						/** TEST: http://stackoverflow.com/questions/4366730/check-if-string-contains-specific-words
						$match_results=$this->get_similar_documents($this->searchArray[0],$lines,"\n");
						echo '<pre>';
						print_r($match_results);
						echo '</pre>';
						**/
						
						foreach ( $lines as $number => $line ) {
							if ($this->containsWord(trim($line),$this->searchArray[0],$this->caseSensitive))
								$fileContents.=$line;
						}
						
						/** OLDER SOLUTION
						foreach ( $lines as $number => $line ) {
							if (($number==0) && (trim($line)===$this->searchArray[0])) {
								$fileContents.='&nbsp;'.$line;
							}
							else {
								$fileContents.=$line;
							}
						}
						**/
						
						// how many different string has been found which has been searched * how many files have been found
						foreach ($this->searchArray as $searchStr) {
							if (trim($searchStr)=="")
								continue;
							
							$searchCount=$fileContents;
							
							/*if ($this->caseSensitive)
								$searchCount=strpos($fileContents,$searchStr);
							else
								$searchCount=stripos($fileContents,$searchStr);
							*/
							
							if ($searchCount && !$this->searchType=="allInFile")
								break;
							else if (!$searchCount && $this->searchType=="allInFile")
								break;
						}
						
						if (isset($searchCount)) {
							if ($searchCount) {
								array_push($this->arrayOfFilenames,$displayFilename);
								$this->fileCounter++;
							}
						}
						
						array_push($this->dirFile,$file) ;
					}
				}
			}
			
			closedir($handle);
			return $this->fileCounter;
		}
		
		public function containsWord($str,$word,$caseSens=false) {
			if ($caseSens) {
				if (strpos($str,$word) !== false)
					return true;
				else
					return false;
			}
			else {
				if (stripos($str,$word) !== false)
					return true;
				else
					return false;
			}
		}
		
		public function get_corpus_index($corpus=array(),$separator=' ') {
			$dictionary=array();
			$doc_count=array();
			
			foreach ($corpus as $doc_id => $doc) {
        $terms=explode($separator, $doc);
        $doc_count[$doc_id]=count($terms);
        
				// tfidf, short for term frequencyinverse document frequency, 
        // according to wikipedia is a numerical statistic that is intended to reflect 
        // how important a word is to a document in a corpus
        foreach($terms as $term) {
					if (!isset($dictionary[$term]))
						$dictionary[$term] = array('document_frequency' => 0, 'postings' => array());
          
					if (!isset($dictionary[$term]['postings'][$doc_id])) {
						$dictionary[$term]['document_frequency']++;
            $dictionary[$term]['postings'][$doc_id] = array('term_frequency' => 0);
          }
          
					$dictionary[$term]['postings'][$doc_id]['term_frequency']++;
        }
        //from http://phpir.com/simple-search-the-vector-space-model/
			}
			return array('doc_count' => $doc_count, 'dictionary' => $dictionary);
		}
		
		public function get_similar_documents($query='',$corpus=array(),$separator=' ') {
			$similar_documents=array();
			
			if ($query!='' && !empty($corpus)) {
				$words=explode($separator,$query);
        $corpus=$this->get_corpus_index($corpus,$separator);
        $doc_count=count($corpus['doc_count']);
        
				foreach($words as $word) {
					if (isset($corpus['dictionary'][$word])) {
						$entry=$corpus['dictionary'][$word];
						
						foreach ($entry['postings'] as $doc_id => $posting) {
							//get term frequencyinverse document frequency
              $score=$posting['term_frequency']*log($doc_count+1/$entry['document_frequency']+1,2);
              
							if (isset($similar_documents[$doc_id]))
								$similar_documents[$doc_id]+=$score;
              else
								$similar_documents[$doc_id]=$score;
            }
          }
        }
        
				// length normalise
        foreach ($similar_documents as $doc_id => $score)
					$similar_documents[$doc_id]=$score/$corpus['doc_count'][$doc_id];
        
				// sort fro  high to low
        arsort($similar_documents);
			}
			
			return $similar_documents;
		}
		
		/**
		 *
		 * function to retrieve text lines from file
		 *  highlighting the search string
		 *
		 * @param $filePath string
		 * @param $classGrepSearch class Instance
		 *
		**/  
		public function createLinesFromFile($filePath,$classGrepSearch,$fontcolor,$usetable=false) {
			$linesArray=file($filePath); 
			$htmlLines="";
			$classGrepSearch->setGlobalCount(0);
			$newLine="";
			
			/** in a CSS file
			.bbb {
				border-collapse: collapse;
				border: 0px solid black;
			}
			**/
			if ($usetable)
				$htmlLines="<table class=\"bbb\">";
			
			for ($i=0;$i<count($linesArray);$i++) {
				$newLine=$classGrepSearch->allStrReplaceTag($linesArray[$i],"<b><font color='$fontcolor'>","</font></b>" )."<br />";
				$j=$i+1;
				
				if ($classGrepSearch->getGlobalResult()) {
					if ($usetable)
						$htmlLines=$htmlLines."<tr><td class=\"bbb\">line ".($j).". ".$newLine."</td></tr>";
					else
						$htmlLines=$htmlLines.'line '.($j).'. '.$newLine;
				}
				
			}
			
			if ($usetable)
				$htmlLines.="</table>";
			
			$globalSearchCount=$classGrepSearch->getGlobalCount(); 
			return $htmlLines; 
		}
		
		/**
		 *
		 * Miscellaneous setters and getters
		 *
		**/
		public function setGlobalCount($aCount) {
			$this->globalCount=$aCount;
		}
		
		public function getGlobalCount() {
			return $this->globalCount;
		}
		
		public function setSearchType($aSearchType) {
			$this->searchType=$aSearchType;
		}
		
		public function setSearchString($aSearchString) {
			$this->searchString=$aSearchString;
		}
		
		public function getSearchType() {
			return $this->searchType;
		}
		
		public function getSearchString() {
			return $this->searchString;
		}
		
		public function getarrayOfFilenames() {
			return $this->arrayOfFilenames;
		}
		
		public function setScanDir($aScanDir) {
			$this->scanDir=$aScanDir;
		}
		
		public function getScanDir() {
			return $this->scanDir;
		}
		
		public function setCaseSensitive($aCaseSensitive) {
			$this->caseSensitive=$aCaseSensitive;
		}
		
		public function getDirFile() {
			return $this->dirFile;
		}
		
		public function getDirArray() {
			return $this->dirArr;
		}
		
		public function setGlobalResult($aResult) {
			$this->globalResult=$aResult;
		}
		
		public function getGlobalResult() {
			return $this->globalResult;
		}
	}
?>