<?php
###############################################################################
# HTTP.php
#
# @author Anil Kumar <akumar@codepunch.com>
# @link   https://codepunch.com
#
############################################################################### 

namespace 	CodePunch\LU;
use 		CodePunch\Base\Util as UTIL;
use			CodePunch\Base\Text as TEXT;

###############################################################################

class HTTP  {
	
	private $lookupManager = null;
	
	###########################################################################
	
	public function __construct($lum=null) {
		$this->lookupManager = $lum;
	}
	
	###############################################################################
	
	public function Lookup(&$ludata)
	{
		$db = $this->lookupManager->getAuthentication()->getDatabase();
		$did = $ludata['sid'];
		$domain = $db->getDomainName($did);
		$lookupcount = 0;
		if($did !== false && $did > 0) {
			$subdomain = "";
			$url = "http://" . UTIL::idn_convert_to_host_name($domain, $subdomain);
			$pagetoken = trim($db->findOneOf($db->getDomainTableName(), "domain", $domain, "home_page_token"));
			if($pagetoken != "") {
				// pagetoken could in the format path;;;;token
				$pos = strpos($pagetoken, ";;;;");
				if($pos !== false) {
					$path = substr($pagetoken, 0, $pos);
					$pagetoken = substr($pagetoken, $pos+4);
					$url .= "/" . $path;
				}
			}
			$pagedata = $this->getPage($url, $pagetoken, 10, 10);
			$hpdata = $this->extractData($pagedata);

			if(method_exists("\CodePunch\LU\Hooks", "onHttpDataLookup"))
				$hpdata = call_user_func(array("\CodePunch\LU\Hooks", "onHttpDataLookup"), $this->lookupManager->getAuthentication(), $domain, $pagedata, $hpdata);

			$disableAlerts = $db->findOneOf($db->getDomainTableName(), "sid", $did, "disable_alerts");
			if($disableAlerts !== false && intval($disableAlerts) == 1)
				UTIL::debug_cli_print("Ignoring HTTP Alerts for " . $domain);
			else
				$this->lookupManager->emailHttpAlerts($did, $pagedata, $hpdata);
			if(count($hpdata)) {
				$hpdata['sid'] = $did;
				$status = $this->lookupManager->updateDomainTable(\CodePunch\LU\LookupManager::HTTP_WEBSITE, $hpdata);
				if($status !== false)
					$lookupcount++;
			}
		}
		$ludata['status'] = \CodePunch\LU\LookupManager::LUQ_COMPLETE;
		return $lookupcount;
	}
	
	###############################################################################
	
	public static function extractData($pagedata)
	{
		$hpdata = array();
		$hpdata['home_page_url'] = $pagedata['url'];
		$hpdata['home_page_title'] = $pagedata['title'];
		$hpdata['redirect_last_url'] = $pagedata['furl'];
		$hpdata['home_page_checked_at'] = date("Y-m-d H:i:s");
		if(isset($pagedata['header']['location'])) {
			$location = $pagedata['header']['location'];
			if(is_array($location))
				$location = implode(";", $location);
			$hpdata['redirect_urls'] = $location;
		}
		if(isset($pagedata['header']['http'])) 
			$hpdata['home_page_status'] = implode(";", $pagedata['header']['http']);
		else {
			$errorstr = isset($pagedata['error']) ? $pagedata['error'] : "Error";
			$hpdata['home_page_status'] = $pagedata['status'] == 0 ? $errorstr : $pagedata['status'];
		}
		$hpdata['home_page_size'] = mb_strlen($pagedata['body']);
		$hpdata['page_token_found'] = $pagedata['token'];
		if(isset($pagedata['header']['last-modified'])) {
			if(is_array($pagedata['header']['last-modified'])) 
				$page_mod_date = end($pagedata['header']['last-modified']);
			else
				$page_mod_date = $pagedata['header']['last-modified'];
			$modtime = strtotime($page_mod_date);
			$page_mod_date = date("Y-m-d H:i:s", $modtime);
			$hpdata['home_page_modified'] = $page_mod_date;
		}
		$hpdata['home_page_header'] = json_encode($pagedata['header']);
		// If the combined headers from all redirects is too big, remove some entries
		// and try again. If still too big, just truncate it.
		if(strlen($hpdata['home_page_header']) > 2047) {
			$unsetkeys = array('set-cookie', 'connection', 'transfer-encoding', 
			'retry-after', 'cache-control', 'accept-ranges', 'date', 'x-cache',
			'x-served-by', 'content-type', 'pragma');
			foreach($unsetkeys as $key) {
				if(isset($pagedata['header'][$key]))
					unset($pagedata['header'][$key]);
				$hpdata['home_page_header'] = json_encode($pagedata['header']);
				if(strlen($hpdata['home_page_header']) <= 2047)
					break;
			}
			if(strlen($hpdata['home_page_header']) > 2047)
				$hpdata['home_page_header'] = substr($hpdata['home_page_header'], 0, 2047);
		}
		if(strlen($hpdata['home_page_title']) > 254)
				$hpdata['home_page_title'] = substr($hpdata['home_page_title'], 0, 254);
		return $hpdata;
	}
	
	###############################################################################

	private function getPage($url, $pagetoken="", $timeout=10, $redirs=10, $sslverify=true, $useragent='?')
	{
		$ext_proxy = $this->lookupManager->getProxy('http');
		$pagedata = UTIL::curl_get_url_data($url, $timeout, false, false, $redirs, $sslverify, $useragent, $ext_proxy);
		if($pagedata['status'] != 0) {
			$headers = $pagedata['header'];
			$pagedata['url'] = $url;
			$pagedata['title'] = UTIL::getDOMTitle(mb_substr($pagedata['body'], 0));
			$pagedata['size'] = mb_strlen($pagedata['body']);
			$pagedata['token'] = NULL;
			if($pagetoken != "") {
				$match = mb_stristr($pagedata['body'], $pagetoken);
				if($match !== false)
					$pagedata['token'] = 1;
				else
					$pagedata['token'] = 0;
			}
		}
		else {
			$pagedata['url'] = $url;
			$pagedata['title'] = "";
			$pagedata['size'] = "";
			$pagedata['token'] = 0;
		}
		return $pagedata;
	}
}

###############################################################################
