* @copyright 2009 Zoltán Köteles * @license http://www.opensource.org/licenses/bsd-license.php new BSD * @version SVN: * @link http://metatags.sourceforge.net/ */ /** * Text Meta Tags * * Text_MetaTags generate meta keywords, meta description and title from text. * * @category Text * @package Text_MetaTags * @author Zoltán Köteles * @license http://www.opensource.org/licenses/bsd-license.php new BSD * @link http://metatags.sourceforge.net/ */ class Text_MetaTags { /** * Meta description max. length * * @access private * * @var int */ private $_maxDescriptionLength = 150; /** * The min. length of the words in keywords * * @access private * * @var int */ private $_minWordLength = 3; /** * Max. number of the keywords * * @access private * * @var int */ private $_maxKeywords = 15; /** * Banned words in the keywords * * @access private * * @var array */ private $_bannedWords = array(); /** * Text, we can generate meta from this text * * @access private * * @var string */ private $_text = ''; /** * Marks at the end of the sentences * * @access private * * @var string */ private $_marks = '.!?'; /** * Keywords * * @access private * * @var string */ private $_keywords = ''; /** * Max. length of title * * @access private * * @var int */ private $_maxTitleLength = 70; /** * Array of the words in title * * @access private * * @var array */ private $_titleArr = array(); /** * Title * * @access private * * @var string */ private $_title = ''; /** * Construct * * Available options: * - maxDescriptionLength * - minWordLength * - maxKeywords * - bannedWords * - marks * - maxTitleLength * * You can set it: * * $metaOptions = array( * 'maxDecsriptionLength' => 70, * 'minWordLength' => 3, * 'maxKeywords' => 15, * 'bannedWords' => 'foo, bar', //or array('foo', 'bar') * 'marks' => '.?!', * 'maxTitleLength' => 30 * ); * * $meta =& new Text_MetaTags($metaOptions); * * * @param array $options parameter list * * @access public * * @return null */ public function __construct($options = array()) { // max. length of the meta descrition if (isset($options['maxDescriptionLength']) && is_numeric($options['maxDescriptionLength']) ) { $this->_maxDescriptionLength = (int)$options['maxDescriptionLength']; } // min. length of the words in keywords if (isset($options['minWordLength']) && is_numeric($options['minWordLength']) ) { $this->_minWordLength = (int)$options['minWordLength']; } // max. number of the keywords if (isset($options['maxKeywords']) && is_numeric($options['maxKeywords']) ) { $this->_maxKeywords = (int)$options['maxKeywords']; } // banned words in the keywords if (isset($options['bannedWords'])) { if (is_array($options['bannedWords'])) { $this->_bannedWords = $options['bannedWords']; } if (is_string($options['bannedWords'])) { $this->_bannedWords = explode(',', $options['bannedWords']); } } // marks at the end of the sentences if (isset($options['marks']) && is_string($options['marks'])) { $this->_marks = $options['marks']; } // max. length of title if (isset($options['maxTitleLength']) && is_numeric($options['maxTitleLength']) ) { $this->_maxTitleLength = (int)$options['maxTitleLength']; } } /** * Sets the text * * @param string $text Text * * @access public * * @return bool */ public function setText($text) { $this->_text = $this->_stripText($text); return true; } /** * Get the text * * @access public * * @return string */ public function getText() { return $this->_text; } /** * Sets the url * * @param string $url Url what we want to check * * @access public * * @return bool */ public function setUrl($url) { require_once 'HTTP/Request2.php'; $request = new HTTP_Request2($url, HTTP_Request2::METHOD_GET); try { $response = $request->send(); if (200 == $response->getStatus()) { //sets the text $this->_text = $this->_stripText( $this->_extractString($response->getBody(), '') ); //sets the title $this->setTitle($this->_extractString($response->getBody(), '', '')); } else { echo 'Unexpected HTTP status: '.$response->getStatus().' '. $response->getReasonPhrase(); } } catch (HTTP_Request2_Exception $e) { echo 'Error: '.$e->getMessage(); } } /** * Get all tags (title, description, keywords) * * @access public * * @return array */ public function getMetaTags() { $meta = array( 'keywords' => $this->getKeywords(), 'description' => $this->getDescription(), 'title' => $this->getTitle() ); return $meta; } /** * Sets the max. keywords * * @param int $max Max. keywords * * @access public * * @return bool */ public function setMaxKeywords($max) { if (is_numeric($max)) { $this->_maxKeywords = (int)$max; } return true; } /** * Get max. keywords * * @access public * * @return int */ public function getMaxKeywords() { return (int)$this->_maxKeywords; } /** * Sets the min. word's length in the keywords * * @param int $len Min. length * * @access public * * @return bool */ public function setMinWordLength($len) { if (is_numeric($len)) { $this->_minWordLength = (int)$len; } return true; } /** * Get min. word's length * * @access public * * @return int */ public function getMinWordLength() { return (int)$this->_minWordLength; } /** * Sets the banned words * * @param void $words Banned words * * @access public * * @return bool */ public function setBannedWords($words) { if (isset($words) && is_string($words)) { $tmpWords = explode(',', $words); if (is_array($tmpWords)) { $this->_bannedWords = $tmpWords; } } if (isset($words) && is_array($words)) { $this->_bannedWords = $words; } return true; } /** * Get banned words * * @access public * * @return array */ public function getBannedWords() { return $this->_bannedWords; } /** * Get the keywords * * @access public * * @return string */ public function getKeywords() { $text = $this->getText(); $text = str_replace($this->_createArrayFromMarks(), ' ', $text); $text = str_replace(array(' ', '.'), ',', $text); $tmpText = explode(',', $text); unset($text); $tmpMerged = array_merge( $this->_getOneWords($tmpText), $this->_getTwoWords($tmpText), $this->_getThreeWords($tmpText) ); uasort($tmpMerged, array($this, '_compare')); $keywords = $this->_countKeywords($tmpMerged); $this->_keywords = substr($keywords, 0 , -2); return $this->_keywords; } /** * Sets the description max. length * * @param int $len Description max. length * * @access public * * @return bool */ public function setDescriptionLength($len) { if (is_numeric($len)) { $this->_maxDescriptionLength = (int)$len; } return true; } /** * Get the description's max. length * * @access public * * @return int */ public function getDescriptionLength() { return (int)$this->_maxDescriptionLength; } /** * Get meta description * * @access public * * @return string */ public function getDescription() { $matches = $this->_implodeByMarks(); if (is_array($matches) && count($matches) > 0) { foreach ($matches as $key => $sentence) { $countSentence[$key] = $this->_countStringInSentence($sentence); } uasort($countSentence, array($this, '_compare')); $description = ''; foreach ($countSentence as $key => $sentence) { $description .= ucfirst($matches[$key]).'. '; } //check title if (!isset($this->_title) || $this->_title == '') { $this->_title = $this->_truncateText($description, $this->_maxTitleLength); } $description = $this->_truncateText($description, $this->_maxDescriptionLength); } else { $description = $this->_truncateText($this->getText(), $this->_maxDescriptionLength); } return $description; } /** * Sets the max. length of the title * * @param int $len Max. length of the title * * @access public * * @return bool */ public function setMaxTitleLength($len) { if (is_numeric($len)) { $this->_maxTitleLength = (int)$len; } return true; } /** * Get max. length of the title * * @access public * * @return int */ public function getMaxTitleLength() { return (int)$this->_maxTitleLength; } /** * Sets the title * * @param string $title Title * * @access public * * @return bool */ public function setTitle($title) { //create an array from title $this->_titleArr = explode(' ', $this->_stripText($title)); $this->_title = $this->_truncateText($title, $this->_maxTitleLength); return true; } /** * Get title * * @access public * * @return string */ public function getTitle() { return $this->_title; } /** * Implode the text by marks * * @access private * * @return array */ private function _implodeByMarks() { return preg_split('/['.$this->_marks.'](\s|$)/', $this->getText()); } /** * Creates an array from the marks * * @access private * * @return array */ private function _createArrayFromMarks() { $marksArray = array(); if ($this->_marks != '') { $countMarks = strlen($this->_marks); for ($i = 0; $i < $countMarks; $i++) { $marksArray[$i] = substr($this->_marks, $i, 1); } } return $marksArray; } /** * Compare * * @param int $a Expression 1 * @param int $b Expression 2 * * @access private * * @return int */ private function _compare($a, $b) { if ($a == $b) { return 0; } return ($a < $b) ? 1 : -1; } /** * Count string in sentences * * @param string $sentence Sentence * * @access private * * @return int */ private function _countStringInSentence($sentence) { if ($this->_keywords == '') { $this->_keywords = $this->getKeywords(); } //create an array from the keywords $tmpKeywords = explode(',', $this->_keywords); $wordNum = 0; foreach ($tmpKeywords as $keyword) { $wordNum += $this->_stringSubstringCount($this->_stringToLower($sentence), $this->_stringToLower($keyword)); } return $wordNum; } /** * Strip unwanted character from text * * @param string $text Text * * @access private * * @return string */ private function _stripText($text) { $punctuations = array( ',', ')', '(', "'", '"', '<', '>', '/', '-', '_', '[', ']', ':', '+', '=', '#', '$', '"', '©', '>', '<', ' ', '™', '®', ';', '*', '\r', '\n', '\r\n', chr(10), chr(13), chr(9) ); $text = strip_tags($text); $text = str_replace($punctuations, ' ', $text); $text = preg_replace('/ {2,}/si', ' ', $text); return strtolower($text); } /** * Truncate text * * @param string $text Text * @param int $length Max. length of the text * * @access private * * @return string */ private function _truncateText($text, $length) { if ($this->_stringLength($text) > $length) { $text = preg_replace('/\s+?(\S+)?$/', '', substr($text, 0, $length+1)); } return substr($text, 0, $length); } /** * Collect one words * * @param string $text Content * * @access private * * @return array */ private function _getOneWords($text) { $wordCounter = array(); foreach ($text as $oneWord) { $oneWord = trim($oneWord); if ($this->_stringLength($oneWord) >= $this->_minWordLength && !in_array($oneWord, $this->_bannedWords) ) { if (array_key_exists($oneWord, $wordCounter)) { $wordCounter[$oneWord] = $wordCounter[$oneWord]+1; } else { $wordCounter[$oneWord] = 1; } //check title if (is_array($this->_titleArr) && in_array($oneWord, $this->_titleArr) ) { $wordCounter[$oneWord]++; } } } unset($text); uasort($wordCounter, array($this, '_compare')); $oneWords = array_chunk($wordCounter, $this->_maxKeywords, true); $oneWords = $oneWords[0]; unset($wordCounter); return $oneWords; } /** * Collect two words * * @param string $text Content * * @access private * * @return array */ private function _getTwoWords($text) { $wordCounter = array(); for ($i = 0; $i < count($text)-1; $i++) { if ($this->_stringLength(trim($text[$i])) >= $this->_minWordLength && $this->_stringLength(trim($text[$i+1])) >= $this->_minWordLength && !in_array($text[$i], $this->_bannedWords) && !in_array($text[$i+1], $this->_bannedWords) ) { $twoWord = trim($text[$i])." ".trim($text[$i+1]); if (array_key_exists($twoWord, $wordCounter)) { $wordCounter[$twoWord] = $wordCounter[$twoWord]+1; } else { $wordCounter[$twoWord] = 1; } //check title if (is_array($this->_titleArr) && in_array($twoWord, $this->_titleArr) ) { $wordCounter[$twoWord]++; } } } unset($text); uasort($wordCounter, array($this, '_compare')); $twoWords = array_chunk($wordCounter, $this->_maxKeywords, true); $twoWords = $twoWords[0]; unset($wordCounter); return $twoWords; } /** * Collect three words * * @param string $text Content * * @access private * * @return array */ private function _getThreeWords($text) { $wordCounter = array(); for ($i = 0; $i < count($text)-1; $i++) { if ($this->_stringLength(trim($text[$i])) >= $this->_minWordLength && $this->_stringLength(trim($text[$i+1])) >= $this->_minWordLength && $this->_stringLength(trim($text[$i+2])) >= $this->_minWordLength && !in_array($text[$i], $this->_bannedWords) && !in_array($text[$i+1], $this->_bannedWords) && !in_array($text[$i+2], $this->_bannedWords) ) { $threeWord = trim($text[$i])." ".trim($text[$i+1])." ".trim($text[$i+2]); if (array_key_exists($threeWord, $wordCounter)) { $wordCounter[$threeWord] = $wordCounter[$threeWord]+1; } else { $wordCounter[$threeWord] = 1; } //check title if (is_array($this->_titleArr) && in_array($threeWord, $this->_titleArr) ) { $wordCounter[$threeWord]++; } } } unset($text); uasort($wordCounter, array($this, '_compare')); $threeWords = array_chunk($wordCounter, $this->_maxKeywords, true); $threeWords = $threeWords[0]; unset($wordCounter); return $threeWords; } /** * Count max. keywords in an array * * @param array $words Collected one, two, three keywords * * @access private * * @return string */ private function _countKeywords($words) { $i = 1; $tmpWords = ''; foreach ($words as $key => $value) { $tmpWords .= $key.', '; if ($i < $this->_maxKeywords) { $i++; } else { break; } } return $tmpWords; } /** * Extract string from a string * * @param string $str The whole string * @param string $start Start string * @param string $end End string * * @access private * * @return void */ private function _extractString($str, $start, $end) { $strLower = $this->_stringToLower($str); $posStart = $this->_stringPosition($strLower, $start); $posEnd = $this->_stringPosition($strLower, $end, ($posStart + $this->_stringLength($start))); if (($posStart !== false) && ($posEnd !== false)) { $pos1 = $posStart + $this->_stringLength($start); $pos2 = $posEnd - $pos1; return $this->_stringSubstring($str, $pos1, $pos2); } return true; } /** * Converts the string to lowercase * * @param string $string The input string. * * @access private * * @return string */ private function _stringToLower($string) { if (function_exists('mb_strtolower')) { return mb_strtolower($string); } else { return strtolower($string); } } /** * Get string length * * @param string $string The string being measured for length. * * @access private * * @return string */ private function _stringLength($string) { if (function_exists('mb_strlen')) { return mb_strlen($string); } else { return strlen($string); } } /** * Count the number of substring occurrences * * @param string $string The string to search in * @param string $search The substring to search for * * @access private * * @return int */ private function _stringSubstringCount($string, $search) { if (function_exists('mb_substr_count')) { return mb_substr_count($string, $search); } else { return substr_count($string, $search); } } /** * Find position of first occurrence of a string * * @param string $string The string to search in * @param string $needle The string what we search * @param int $offset Which character in $string to start searching. * * @access private * * @return string */ private function _stringPosition($string, $needle, $offset = 0) { if (function_exists('mb_strpos')) { return mb_strpos($string, $needle, $offset); } else { return strpos($string, $needle, $offset); } } /** * Get part of string * * @param string $string The string being checked. * @param int $start The first position used in $string. * @param int $length The maximum length of the returned string. * * @access private * * @return string */ private function _stringSubstring($string, $start, $length) { if (function_exists('mb_substr')) { return mb_substr($string, $start, $length); } else { return substr($string, $start, $length); } } } ?>